Compare commits

...

2 Commits

Author SHA1 Message Date
cel 🌸 aa940a8eac create element builder and refactor api 2024-11-28 18:00:30 +00:00
cel 🌸 381af38a09 automatically declare default namespaces unless overriden 2024-11-24 14:59:41 +00:00
5 changed files with 624 additions and 34 deletions

View File

@ -1,25 +1,33 @@
// elements resemble a final tree, including inherited namespace information
#![feature(drain_filter)]
use std::{
collections::{HashMap, HashSet},
collections::{HashMap, HashSet, VecDeque},
convert::Infallible,
str::FromStr,
};
use crate::{
error::Error,
error::{DeserializeError, Error},
xml::{self, parsers_complete::Parser, Attribute},
Result,
};
pub type DeserializeResult<T> = std::result::Result<T, DeserializeError>;
pub trait FromElement: Sized {
fn from_element(element: Element) -> Result<Self>;
fn from_element(element: Element) -> DeserializeResult<Self>;
}
pub trait IntoElement {
fn into_element(&self) -> Element;
fn builder(&self) -> ElementBuilder;
fn get_content(&self) -> Vec<Content> {
fn into_element(&self) -> Element {
self.builder().build().unwrap()
}
fn get_content(&self) -> VecDeque<Content> {
let element = self.into_element();
element.content
}
@ -58,7 +66,8 @@ pub struct Element {
// namespace: String,
// hashmap of explicit namespace declarations on the element itself only
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
pub namespace_declarations: HashSet<NamespaceDeclaration>,
// change this to custom namespace declarations only, so you can override the definition of namespaces if you wish
pub namespace_declaration_overrides: HashSet<NamespaceDeclaration>,
// attributes can be in a different namespace than the element. how to make sure they are valid?
// maybe include the namespace instead of or with the prefix
// you can calculate the prefix from the namespaced name and the current writer context
@ -66,7 +75,524 @@ pub struct Element {
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
pub attributes: HashMap<Name, String>,
// TODO: make a hashmap maybe? to be able to address parts of the content individually
pub content: Vec<Content>,
pub content: VecDeque<Content>,
}
impl Element {
pub fn identify(&self) -> (Option<&str>, &str) {
(self.name.namespace.as_deref(), &self.name.local_name)
}
pub fn check_name(&self, name: &str) -> DeserializeResult<()> {
if self.name.local_name == name {
Ok(())
} else {
return Err(DeserializeError::IncorrectName(
self.name.local_name.clone(),
));
}
}
pub fn check_namespace(&self, namespace: &str) -> DeserializeResult<()> {
if self.name.namespace.as_deref() == Some(namespace) {
return Ok(());
} else {
if let Some(namespace) = &self.name.namespace {
return Err(DeserializeError::IncorrectNamespace(namespace.clone()));
} else {
return Err(DeserializeError::Unqualified);
}
}
}
pub fn attribute_opt<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<Option<V>> {
if let Some(att_value) = self.attributes.remove(&Name {
namespace: None,
local_name: att_name.to_string(),
}) {
let value = <V as FromStr>::from_str(&att_value)
.map_err(|_| DeserializeError::FromStr(att_value))?;
return Ok(Some(value));
} else {
return Ok(None);
}
}
pub fn attribute_opt_namespaced<V: FromStr>(
&mut self,
att_name: &str,
att_namespace: &str,
) -> DeserializeResult<Option<V>> {
if let Some(att_value) = self.attributes.remove(&Name {
namespace: Some(att_namespace.to_string()),
local_name: att_name.to_string(),
}) {
let value = <V as FromStr>::from_str(&att_value)
.map_err(|_| DeserializeError::FromStr(att_value))?;
return Ok(Some(value));
} else {
return Ok(None);
}
}
pub fn attribute<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<V> {
let name = Name {
namespace: None,
local_name: att_name.to_string(),
};
if let Some(att_value) = self.attributes.remove(&name) {
let value = <V as FromStr>::from_str(&att_value)
.map_err(|_| DeserializeError::FromStr(att_value))?;
return Ok(value);
} else {
return Err(DeserializeError::MissingAttribute(name));
}
}
pub fn attribute_namespaced<V: FromStr>(
&mut self,
att_name: &str,
att_namespace: &str,
) -> DeserializeResult<V> {
let name = Name {
namespace: Some(att_namespace.to_string()),
local_name: att_name.to_string(),
};
if let Some(att_value) = self.attributes.remove(&name) {
let value = <V as FromStr>::from_str(&att_value)
.map_err(|_| DeserializeError::FromStr(att_value))?;
return Ok(value);
} else {
return Err(DeserializeError::MissingAttribute(name));
}
}
pub fn no_more_attributes(self) -> DeserializeResult<Self> {
if self.attributes.is_empty() {
Ok(self)
} else {
Err(DeserializeError::UnexpectedAttributes(self.attributes))
}
}
// for xs:any
pub fn child_one<T: FromElement>(&mut self) -> DeserializeResult<T> {
if let Some(position) = self.content.iter().position(|content| match content {
Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(),
Content::Text(_) => false,
Content::PI => false,
Content::Comment(_) => false,
}) {
let element = self.content.remove(position).unwrap();
if let Content::Element(e) = element {
return <T as FromElement>::from_element(e);
} else {
return Err(DeserializeError::MissingChild);
}
} else {
return Err(DeserializeError::MissingChild);
}
}
pub fn child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> {
if let Some(position) = self.content.iter().position(|content| match content {
Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(),
Content::Text(_) => false,
Content::PI => false,
Content::Comment(_) => false,
}) {
let element = self.content.remove(position).unwrap();
if let Content::Element(e) = element {
return Ok(Some(<T as FromElement>::from_element(e)?));
} else {
return Err(DeserializeError::MissingChild);
}
} else {
return Ok(None);
}
}
pub fn children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> {
let (children, rest): (VecDeque<_>, VecDeque<_>) = self
.content
.clone()
.into_iter()
.partition(|content| match content {
Content::Element(element) => {
<T as FromElement>::from_element(element.clone()).is_ok()
}
Content::Text(_) => false,
Content::PI => false,
Content::Comment(_) => false,
});
self.content = rest;
let children: Vec<T> = children
.into_iter()
.map(|content| {
let child = match content {
Content::Element(element) => <T as FromElement>::from_element(element).ok(),
Content::Text(_) => None,
Content::PI => None,
Content::Comment(_) => None,
}
.unwrap();
child
})
.collect();
Ok(children)
}
pub fn value<V: FromStr>(&mut self) -> DeserializeResult<V> {
if let Some(position) = self.content.iter().position(|content| match content {
Content::Element(_) => false,
Content::Text(s) => <V as FromStr>::from_str(s).is_ok(),
Content::PI => false,
Content::Comment(_) => false,
}) {
let element = self.content.remove(position).unwrap();
if let Content::Text(v) = element {
return Ok(<V as FromStr>::from_str(&v).ok().unwrap());
} else {
panic!("infallible")
}
} else {
return Err(DeserializeError::MissingValue);
}
}
pub fn value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> {
if let Some(position) = self.content.iter().position(|content| match content {
Content::Element(_) => false,
Content::Text(s) => <V as FromStr>::from_str(s).is_ok(),
Content::PI => false,
Content::Comment(_) => false,
}) {
let element = self.content.remove(position).unwrap();
if let Content::Text(v) = element {
return Ok(<V as FromStr>::from_str(&v).ok());
} else {
panic!("infallible")
}
} else {
return Ok(None);
}
}
// for xs:sequence
pub fn pop_child_one<T: FromElement>(&mut self) -> DeserializeResult<T> {
loop {
let child = self
.content
.pop_front()
.ok_or(DeserializeError::MissingChild)?;
match child {
Content::Element(element) => return Ok(<T as FromElement>::from_element(element)?),
Content::Text(_) => {
return Err(DeserializeError::UnexpectedContent(self.content.clone()))
}
Content::PI => {}
Content::Comment(_) => {}
}
}
}
pub fn pop_child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> {
loop {
let child = self.content.pop_front();
if let Some(child) = child {
match child {
Content::Element(element) => {
return Ok(Some(<T as FromElement>::from_element(element)?))
}
Content::Text(_) => {
return Err(DeserializeError::UnexpectedContent(self.content.clone()))
}
Content::PI => {}
Content::Comment(_) => {}
}
} else {
return Ok(None);
}
}
}
pub fn pop_children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> {
let mut children = Vec::new();
loop {
let child = self.content.front();
if let Some(child) = child {
match child {
Content::Element(element) => {
if let Ok(child) = <T as FromElement>::from_element(element.clone()) {
children.push(child);
self.content.pop_front();
}
}
Content::Text(_) => return Ok(children),
Content::PI => {}
Content::Comment(_) => {}
}
} else {
return Ok(children);
}
}
}
pub fn pop_value<V: FromStr>(&mut self) -> DeserializeResult<V> {
loop {
let child = self
.content
.pop_front()
.ok_or(DeserializeError::MissingChild)?;
match child {
Content::Element(_) => {
return Err(DeserializeError::UnexpectedContent(self.content.clone()))
}
Content::Text(t) => {
return Ok(
<V as FromStr>::from_str(&t).map_err(|_| DeserializeError::FromStr(t))?
)
}
Content::PI => {}
Content::Comment(_) => {}
}
}
}
pub fn pop_value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> {
loop {
let child = self.content.pop_front();
if let Some(child) = child {
match child {
Content::Element(_) => {
return Err(DeserializeError::UnexpectedContent(self.content.clone()))
}
Content::Text(t) => {
return Ok(Some(
<V as FromStr>::from_str(&t)
.map_err(|_| DeserializeError::FromStr(t))?,
))
}
Content::PI => {}
Content::Comment(_) => {}
}
} else {
return Ok(None);
}
}
}
pub fn no_more_content(self) -> DeserializeResult<Self> {
if self
.content
.iter()
.filter(|content| match content {
Content::Element(_) => true,
Content::Text(_) => true,
Content::PI => false,
Content::Comment(_) => false,
})
.collect::<Vec<_>>()
.is_empty()
{
Ok(self)
} else {
Err(DeserializeError::UnexpectedContent(self.content))
}
}
pub fn builder(name: impl ToString, namespace: Option<impl ToString>) -> ElementBuilder {
ElementBuilder::new(name, namespace)
}
}
pub struct ElementBuilder {
name: Name,
namespace_declaration_overrides: Vec<NamespaceDeclaration>,
attributes: Vec<(Name, String)>,
content: Vec<ContentBuilder>,
}
impl ElementBuilder {
pub fn new(name: impl ToString, namespace: Option<impl ToString>) -> Self {
Self {
name: Name {
namespace: namespace.map(|namespace| namespace.to_string()),
local_name: name.to_string(),
},
namespace_declaration_overrides: Vec::new(),
attributes: Vec::new(),
content: Vec::new(),
}
}
pub fn push_namespace_declaration_override(
mut self,
prefix: Option<impl ToString>,
namespace: impl ToString,
) -> Self {
self.namespace_declaration_overrides
.push(NamespaceDeclaration {
prefix: prefix.map(|prefix| prefix.to_string()),
namespace: namespace.to_string(),
});
self
}
pub fn push_attribute<N: ToString, V: ToString>(mut self, name: N, value: V) -> Self {
self.attributes.push((
// TODO: make sure name is a valid name, same for prefixes
Name {
namespace: None,
local_name: name.to_string(),
},
value.to_string(),
));
self
}
pub fn push_attribute_namespaced(
mut self,
namespace: impl ToString,
name: impl ToString,
value: impl ToString,
) -> Self {
self.attributes.push((
Name {
namespace: Some(namespace.to_string()),
local_name: name.to_string(),
},
value.to_string(),
));
self
}
pub fn push_child(mut self, child: ElementBuilder) -> Self {
self.content.push(ContentBuilder::Element(child));
self
}
pub fn push_text(mut self, text: impl ToString) -> Self {
self.content.push(ContentBuilder::Text(text.to_string()));
self
}
pub fn push_attribute_opt(self, name: impl ToString, value: Option<impl ToString>) -> Self {
if let Some(value) = value {
self.push_attribute(name, value)
} else {
self
}
}
pub fn push_attribute_opt_namespaced(
self,
namespace: impl ToString,
name: impl ToString,
value: Option<impl ToString>,
) -> Self {
if let Some(value) = value {
self.push_attribute_namespaced(namespace, name, value)
} else {
self
}
}
pub fn push_child_opt(self, child: Option<ElementBuilder>) -> Self {
if let Some(child) = child {
self.push_child(child)
} else {
self
}
}
pub fn push_text_opt(self, text: Option<impl ToString>) -> Self {
if let Some(text) = text {
self.push_text(text)
} else {
self
}
}
pub fn push_content(mut self, content: ContentBuilder) -> Self {
self.content.push(content);
self
}
pub fn push_children(self, children: Vec<impl IntoContent>) -> Self {
let mut element_builder = self;
for child in children {
element_builder = element_builder.push_content(child.builder())
}
element_builder
}
pub fn build(&self) -> Result<Element> {
let mut namespace_declaration_overrides = HashSet::new();
for namespace_declaration in &self.namespace_declaration_overrides {
if !namespace_declaration_overrides.insert(namespace_declaration.clone()) {
return Err(Error::DuplicateNameSpaceDeclaration(
namespace_declaration.clone(),
));
}
}
let mut attributes = HashMap::new();
for (att_name, att_value) in &self.attributes {
if attributes
.insert(att_name.clone(), att_value.to_string())
.is_some()
{
// TODO: better error
return Err(Error::DuplicateAttribute(att_name.local_name.to_string()));
}
}
let content: Result<VecDeque<Content>> = self
.content
.iter()
.map(|content_builder| -> Result<Content> { Ok(content_builder.build()?) })
.collect();
let content = content?;
Ok(Element {
name: self.name.clone(),
namespace_declaration_overrides,
attributes,
content,
})
}
}
pub trait IntoContent {
fn builder(&self) -> ContentBuilder;
}
impl<T> IntoContent for T
where
T: IntoElement,
{
fn builder(&self) -> ContentBuilder {
ContentBuilder::Element(self.builder())
}
}
pub enum ContentBuilder {
Element(ElementBuilder),
Text(String),
}
impl ContentBuilder {
pub fn build(&self) -> Result<Content> {
match self {
ContentBuilder::Element(element_builder) => {
Ok(Content::Element(element_builder.build()?))
}
ContentBuilder::Text(text) => Ok(Content::Text(text.to_string())),
}
}
}
pub fn escape_str(s: &str) -> String {

View File

@ -1,6 +1,26 @@
use std::{num::ParseIntError, str::Utf8Error};
use std::{
collections::{HashMap, VecDeque},
num::ParseIntError,
str::{FromStr, Utf8Error},
};
use crate::element::{Content, Name, NamespaceDeclaration};
use crate::{
element::{Content, Name, NamespaceDeclaration},
Element,
};
#[derive(Debug)]
pub enum DeserializeError {
FromStr(String),
UnexpectedAttributes(HashMap<Name, String>),
UnexpectedContent(VecDeque<Content>),
MissingAttribute(Name),
IncorrectName(String),
IncorrectNamespace(String),
Unqualified,
MissingChild,
MissingValue,
}
#[derive(Debug)]
pub enum Error {
@ -23,6 +43,13 @@ pub enum Error {
UnexpectedNumberOfContents(usize),
UnexpectedContent(Content),
UnexpectedElement(Name),
Deserialize(DeserializeError),
}
impl From<DeserializeError> for Error {
fn from(e: DeserializeError) -> Self {
Self::Deserialize(e)
}
}
impl From<std::io::Error> for Error {

View File

@ -11,6 +11,7 @@ pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace";
pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/";
pub use element::Element;
pub use error::DeserializeError;
pub use error::Error;
pub use reader::Reader;
pub use writer::Writer;

View File

@ -2,7 +2,7 @@ use circular::Buffer;
use futures::{FutureExt, Stream};
use nom::Err;
use std::{
collections::{hash_set, BTreeMap, HashMap, HashSet},
collections::{hash_set, BTreeMap, HashMap, HashSet, VecDeque},
future::Future,
path::Prefix,
pin::{pin, Pin},
@ -102,12 +102,12 @@ where
pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> {
let element = self.read_start_tag().await?;
FromElement::from_element(element)
Ok(FromElement::from_element(element)?)
}
pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> {
let element = self.read_element().await?;
FromElement::from_element(element)
Ok(FromElement::from_element(element)?)
}
pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
@ -436,9 +436,9 @@ impl<R> Reader<R> {
return Ok(Element {
name: element_name,
namespace_declarations: element_namespace_declarations,
namespace_declaration_overrides: element_namespace_declarations,
attributes,
content: Vec::new(),
content: VecDeque::new(),
});
}
@ -652,12 +652,12 @@ impl<R> Reader<R> {
namespace_declarations.pop();
} else {
content = Vec::new();
content = VecDeque::new();
}
return Ok(Element {
name: element_name,
namespace_declarations: element_namespace_declarations,
namespace_declaration_overrides: element_namespace_declarations,
attributes,
content,
});
@ -666,18 +666,18 @@ impl<R> Reader<R> {
fn content_from_xml(
namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
xml_content: xml::Content,
) -> Result<Vec<Content>> {
let mut content = Vec::new();
) -> Result<VecDeque<Content>> {
let mut content = VecDeque::new();
let mut text = xml_content.char_data.map(|str| String::from(*str));
for (content_item, char_data) in xml_content.content {
match content_item {
xml::ContentItem::Element(element) => {
text.map(|text| {
if !text.is_empty() {
content.push(Content::Text(text))
content.push_back(Content::Text(text))
}
});
content.push(Content::Element(Self::element_from_xml(
content.push_back(Content::Element(Self::element_from_xml(
namespaces, element,
)?));
text = char_data.map(|str| String::from(*str));
@ -711,7 +711,7 @@ impl<R> Reader<R> {
}
text.map(|text| {
if !text.is_empty() {
content.push(Content::Text(text))
content.push_back(Content::Text(text))
}
});
Ok(content)

View File

@ -93,20 +93,38 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
}
pub async fn write_empty(&mut self, element: &Element) -> Result<()> {
let namespace_declarations_stack: Vec<_> = self
let mut namespace_declarations_stack: Vec<_> = self
.namespace_declarations
.iter()
.flatten()
.chain(&element.namespace_declarations)
.chain(&element.namespace_declaration_overrides)
.collect();
let mut namespace_declarations = element.namespace_declaration_overrides.clone();
let default_namespace_declaration;
let prefix;
if let Some(namespace) = &element.name.namespace {
let name_namespace_declaration = namespace_declarations_stack
if let Some(name_namespace_declaration) = namespace_declarations_stack
.iter()
.rfind(|namespace_declaration| namespace_declaration.namespace == *namespace)
.ok_or(Error::UndeclaredNamespace(namespace.clone()))?;
prefix = name_namespace_declaration.prefix.as_ref();
{
prefix = name_namespace_declaration.prefix.as_ref();
} else {
default_namespace_declaration = NamespaceDeclaration {
prefix: None,
namespace: namespace.clone(),
};
if namespace_declarations.insert(default_namespace_declaration.clone()) {
namespace_declarations_stack.push(&default_namespace_declaration);
prefix = None
} else {
return Err(Error::DuplicateNameSpaceDeclaration(NamespaceDeclaration {
prefix: None,
namespace: namespace.clone(),
}));
}
}
} else {
prefix = None
}
@ -125,7 +143,7 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
let mut attributes = Vec::new();
for namespace_declaration in &element.namespace_declarations {
for namespace_declaration in namespace_declarations.iter() {
let ns_name = namespace_declaration
.prefix
.as_ref()
@ -180,20 +198,38 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
}
pub async fn write_element_start(&mut self, element: &Element) -> Result<()> {
let namespace_declarations_stack: Vec<_> = self
let mut namespace_declarations_stack: Vec<_> = self
.namespace_declarations
.iter()
.flatten()
.chain(&element.namespace_declarations)
.chain(&element.namespace_declaration_overrides)
.collect();
let mut namespace_declarations = element.namespace_declaration_overrides.clone();
let default_namespace_declaration;
let prefix;
if let Some(namespace) = &element.name.namespace {
let name_namespace_declaration = namespace_declarations_stack
if let Some(name_namespace_declaration) = namespace_declarations_stack
.iter()
.rfind(|namespace_declaration| namespace_declaration.namespace == *namespace)
.ok_or(Error::UndeclaredNamespace(namespace.clone()))?;
prefix = name_namespace_declaration.prefix.as_ref();
{
prefix = name_namespace_declaration.prefix.as_ref();
} else {
default_namespace_declaration = NamespaceDeclaration {
prefix: None,
namespace: namespace.clone(),
};
if namespace_declarations.insert(default_namespace_declaration.clone()) {
namespace_declarations_stack.push(&default_namespace_declaration);
prefix = None
} else {
return Err(Error::DuplicateNameSpaceDeclaration(NamespaceDeclaration {
prefix: None,
namespace: namespace.clone(),
}));
}
}
} else {
prefix = None
}
@ -212,7 +248,7 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
let mut attributes = Vec::new();
for namespace_declaration in &element.namespace_declarations {
for namespace_declaration in namespace_declarations.iter() {
let ns_name = namespace_declaration
.prefix
.as_ref()
@ -265,7 +301,7 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
self.depth.push(element.name.clone());
self.namespace_declarations
.push(element.namespace_declarations.clone());
.push(namespace_declarations.clone());
Ok(())
}