From aa940a8eac74aca8cd3c202a05092538d1140dda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= Date: Thu, 28 Nov 2024 18:00:30 +0000 Subject: [PATCH] create element builder and refactor api --- src/element.rs | 537 ++++++++++++++++++++++++++++++++++++++++++++++++- src/error.rs | 26 ++- src/lib.rs | 1 + src/reader.rs | 20 +- 4 files changed, 567 insertions(+), 17 deletions(-) diff --git a/src/element.rs b/src/element.rs index 1c04c98..98a3315 100644 --- a/src/element.rs +++ b/src/element.rs @@ -1,25 +1,33 @@ // elements resemble a final tree, including inherited namespace information +#![feature(drain_filter)] + use std::{ - collections::{HashMap, HashSet}, + collections::{HashMap, HashSet, VecDeque}, convert::Infallible, str::FromStr, }; use crate::{ - error::Error, + error::{DeserializeError, Error}, xml::{self, parsers_complete::Parser, Attribute}, Result, }; +pub type DeserializeResult = std::result::Result; + pub trait FromElement: Sized { - fn from_element(element: Element) -> Result; + fn from_element(element: Element) -> DeserializeResult; } pub trait IntoElement { - fn into_element(&self) -> Element; + fn builder(&self) -> ElementBuilder; - fn get_content(&self) -> Vec { + fn into_element(&self) -> Element { + self.builder().build().unwrap() + } + + fn get_content(&self) -> VecDeque { let element = self.into_element(); element.content } @@ -67,7 +75,524 @@ pub struct Element { // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified. pub attributes: HashMap, // TODO: make a hashmap maybe? to be able to address parts of the content individually - pub content: Vec, + pub content: VecDeque, +} + +impl Element { + pub fn identify(&self) -> (Option<&str>, &str) { + (self.name.namespace.as_deref(), &self.name.local_name) + } + + pub fn check_name(&self, name: &str) -> DeserializeResult<()> { + if self.name.local_name == name { + Ok(()) + } else { + return Err(DeserializeError::IncorrectName( + self.name.local_name.clone(), + )); + } + } + + pub fn check_namespace(&self, namespace: &str) -> DeserializeResult<()> { + if self.name.namespace.as_deref() == Some(namespace) { + return Ok(()); + } else { + if let Some(namespace) = &self.name.namespace { + return Err(DeserializeError::IncorrectNamespace(namespace.clone())); + } else { + return Err(DeserializeError::Unqualified); + } + } + } + + pub fn attribute_opt(&mut self, att_name: &str) -> DeserializeResult> { + if let Some(att_value) = self.attributes.remove(&Name { + namespace: None, + local_name: att_name.to_string(), + }) { + let value = ::from_str(&att_value) + .map_err(|_| DeserializeError::FromStr(att_value))?; + return Ok(Some(value)); + } else { + return Ok(None); + } + } + + pub fn attribute_opt_namespaced( + &mut self, + att_name: &str, + att_namespace: &str, + ) -> DeserializeResult> { + if let Some(att_value) = self.attributes.remove(&Name { + namespace: Some(att_namespace.to_string()), + local_name: att_name.to_string(), + }) { + let value = ::from_str(&att_value) + .map_err(|_| DeserializeError::FromStr(att_value))?; + return Ok(Some(value)); + } else { + return Ok(None); + } + } + + pub fn attribute(&mut self, att_name: &str) -> DeserializeResult { + let name = Name { + namespace: None, + local_name: att_name.to_string(), + }; + if let Some(att_value) = self.attributes.remove(&name) { + let value = ::from_str(&att_value) + .map_err(|_| DeserializeError::FromStr(att_value))?; + return Ok(value); + } else { + return Err(DeserializeError::MissingAttribute(name)); + } + } + + pub fn attribute_namespaced( + &mut self, + att_name: &str, + att_namespace: &str, + ) -> DeserializeResult { + let name = Name { + namespace: Some(att_namespace.to_string()), + local_name: att_name.to_string(), + }; + if let Some(att_value) = self.attributes.remove(&name) { + let value = ::from_str(&att_value) + .map_err(|_| DeserializeError::FromStr(att_value))?; + return Ok(value); + } else { + return Err(DeserializeError::MissingAttribute(name)); + } + } + + pub fn no_more_attributes(self) -> DeserializeResult { + if self.attributes.is_empty() { + Ok(self) + } else { + Err(DeserializeError::UnexpectedAttributes(self.attributes)) + } + } + + // for xs:any + + pub fn child_one(&mut self) -> DeserializeResult { + if let Some(position) = self.content.iter().position(|content| match content { + Content::Element(element) => ::from_element(element.clone()).is_ok(), + Content::Text(_) => false, + Content::PI => false, + Content::Comment(_) => false, + }) { + let element = self.content.remove(position).unwrap(); + if let Content::Element(e) = element { + return ::from_element(e); + } else { + return Err(DeserializeError::MissingChild); + } + } else { + return Err(DeserializeError::MissingChild); + } + } + + pub fn child_opt(&mut self) -> DeserializeResult> { + if let Some(position) = self.content.iter().position(|content| match content { + Content::Element(element) => ::from_element(element.clone()).is_ok(), + Content::Text(_) => false, + Content::PI => false, + Content::Comment(_) => false, + }) { + let element = self.content.remove(position).unwrap(); + if let Content::Element(e) = element { + return Ok(Some(::from_element(e)?)); + } else { + return Err(DeserializeError::MissingChild); + } + } else { + return Ok(None); + } + } + + pub fn children(&mut self) -> DeserializeResult> { + let (children, rest): (VecDeque<_>, VecDeque<_>) = self + .content + .clone() + .into_iter() + .partition(|content| match content { + Content::Element(element) => { + ::from_element(element.clone()).is_ok() + } + Content::Text(_) => false, + Content::PI => false, + Content::Comment(_) => false, + }); + self.content = rest; + let children: Vec = children + .into_iter() + .map(|content| { + let child = match content { + Content::Element(element) => ::from_element(element).ok(), + Content::Text(_) => None, + Content::PI => None, + Content::Comment(_) => None, + } + .unwrap(); + child + }) + .collect(); + Ok(children) + } + + pub fn value(&mut self) -> DeserializeResult { + if let Some(position) = self.content.iter().position(|content| match content { + Content::Element(_) => false, + Content::Text(s) => ::from_str(s).is_ok(), + Content::PI => false, + Content::Comment(_) => false, + }) { + let element = self.content.remove(position).unwrap(); + if let Content::Text(v) = element { + return Ok(::from_str(&v).ok().unwrap()); + } else { + panic!("infallible") + } + } else { + return Err(DeserializeError::MissingValue); + } + } + + pub fn value_opt(&mut self) -> DeserializeResult> { + if let Some(position) = self.content.iter().position(|content| match content { + Content::Element(_) => false, + Content::Text(s) => ::from_str(s).is_ok(), + Content::PI => false, + Content::Comment(_) => false, + }) { + let element = self.content.remove(position).unwrap(); + if let Content::Text(v) = element { + return Ok(::from_str(&v).ok()); + } else { + panic!("infallible") + } + } else { + return Ok(None); + } + } + + // for xs:sequence + + pub fn pop_child_one(&mut self) -> DeserializeResult { + loop { + let child = self + .content + .pop_front() + .ok_or(DeserializeError::MissingChild)?; + match child { + Content::Element(element) => return Ok(::from_element(element)?), + Content::Text(_) => { + return Err(DeserializeError::UnexpectedContent(self.content.clone())) + } + Content::PI => {} + Content::Comment(_) => {} + } + } + } + + pub fn pop_child_opt(&mut self) -> DeserializeResult> { + loop { + let child = self.content.pop_front(); + if let Some(child) = child { + match child { + Content::Element(element) => { + return Ok(Some(::from_element(element)?)) + } + Content::Text(_) => { + return Err(DeserializeError::UnexpectedContent(self.content.clone())) + } + Content::PI => {} + Content::Comment(_) => {} + } + } else { + return Ok(None); + } + } + } + + pub fn pop_children(&mut self) -> DeserializeResult> { + let mut children = Vec::new(); + loop { + let child = self.content.front(); + if let Some(child) = child { + match child { + Content::Element(element) => { + if let Ok(child) = ::from_element(element.clone()) { + children.push(child); + self.content.pop_front(); + } + } + Content::Text(_) => return Ok(children), + Content::PI => {} + Content::Comment(_) => {} + } + } else { + return Ok(children); + } + } + } + + pub fn pop_value(&mut self) -> DeserializeResult { + loop { + let child = self + .content + .pop_front() + .ok_or(DeserializeError::MissingChild)?; + match child { + Content::Element(_) => { + return Err(DeserializeError::UnexpectedContent(self.content.clone())) + } + Content::Text(t) => { + return Ok( + ::from_str(&t).map_err(|_| DeserializeError::FromStr(t))? + ) + } + Content::PI => {} + Content::Comment(_) => {} + } + } + } + + pub fn pop_value_opt(&mut self) -> DeserializeResult> { + loop { + let child = self.content.pop_front(); + if let Some(child) = child { + match child { + Content::Element(_) => { + return Err(DeserializeError::UnexpectedContent(self.content.clone())) + } + Content::Text(t) => { + return Ok(Some( + ::from_str(&t) + .map_err(|_| DeserializeError::FromStr(t))?, + )) + } + Content::PI => {} + Content::Comment(_) => {} + } + } else { + return Ok(None); + } + } + } + + pub fn no_more_content(self) -> DeserializeResult { + if self + .content + .iter() + .filter(|content| match content { + Content::Element(_) => true, + Content::Text(_) => true, + Content::PI => false, + Content::Comment(_) => false, + }) + .collect::>() + .is_empty() + { + Ok(self) + } else { + Err(DeserializeError::UnexpectedContent(self.content)) + } + } + + pub fn builder(name: impl ToString, namespace: Option) -> ElementBuilder { + ElementBuilder::new(name, namespace) + } +} + +pub struct ElementBuilder { + name: Name, + namespace_declaration_overrides: Vec, + attributes: Vec<(Name, String)>, + content: Vec, +} + +impl ElementBuilder { + pub fn new(name: impl ToString, namespace: Option) -> Self { + Self { + name: Name { + namespace: namespace.map(|namespace| namespace.to_string()), + local_name: name.to_string(), + }, + namespace_declaration_overrides: Vec::new(), + attributes: Vec::new(), + content: Vec::new(), + } + } + + pub fn push_namespace_declaration_override( + mut self, + prefix: Option, + namespace: impl ToString, + ) -> Self { + self.namespace_declaration_overrides + .push(NamespaceDeclaration { + prefix: prefix.map(|prefix| prefix.to_string()), + namespace: namespace.to_string(), + }); + self + } + + pub fn push_attribute(mut self, name: N, value: V) -> Self { + self.attributes.push(( + // TODO: make sure name is a valid name, same for prefixes + Name { + namespace: None, + local_name: name.to_string(), + }, + value.to_string(), + )); + self + } + + pub fn push_attribute_namespaced( + mut self, + namespace: impl ToString, + name: impl ToString, + value: impl ToString, + ) -> Self { + self.attributes.push(( + Name { + namespace: Some(namespace.to_string()), + local_name: name.to_string(), + }, + value.to_string(), + )); + self + } + + pub fn push_child(mut self, child: ElementBuilder) -> Self { + self.content.push(ContentBuilder::Element(child)); + self + } + + pub fn push_text(mut self, text: impl ToString) -> Self { + self.content.push(ContentBuilder::Text(text.to_string())); + self + } + + pub fn push_attribute_opt(self, name: impl ToString, value: Option) -> Self { + if let Some(value) = value { + self.push_attribute(name, value) + } else { + self + } + } + + pub fn push_attribute_opt_namespaced( + self, + namespace: impl ToString, + name: impl ToString, + value: Option, + ) -> Self { + if let Some(value) = value { + self.push_attribute_namespaced(namespace, name, value) + } else { + self + } + } + + pub fn push_child_opt(self, child: Option) -> Self { + if let Some(child) = child { + self.push_child(child) + } else { + self + } + } + + pub fn push_text_opt(self, text: Option) -> Self { + if let Some(text) = text { + self.push_text(text) + } else { + self + } + } + + pub fn push_content(mut self, content: ContentBuilder) -> Self { + self.content.push(content); + self + } + + pub fn push_children(self, children: Vec) -> Self { + let mut element_builder = self; + for child in children { + element_builder = element_builder.push_content(child.builder()) + } + element_builder + } + + pub fn build(&self) -> Result { + let mut namespace_declaration_overrides = HashSet::new(); + for namespace_declaration in &self.namespace_declaration_overrides { + if !namespace_declaration_overrides.insert(namespace_declaration.clone()) { + return Err(Error::DuplicateNameSpaceDeclaration( + namespace_declaration.clone(), + )); + } + } + + let mut attributes = HashMap::new(); + for (att_name, att_value) in &self.attributes { + if attributes + .insert(att_name.clone(), att_value.to_string()) + .is_some() + { + // TODO: better error + return Err(Error::DuplicateAttribute(att_name.local_name.to_string())); + } + } + + let content: Result> = self + .content + .iter() + .map(|content_builder| -> Result { Ok(content_builder.build()?) }) + .collect(); + let content = content?; + + Ok(Element { + name: self.name.clone(), + namespace_declaration_overrides, + attributes, + content, + }) + } +} + +pub trait IntoContent { + fn builder(&self) -> ContentBuilder; +} + +impl IntoContent for T +where + T: IntoElement, +{ + fn builder(&self) -> ContentBuilder { + ContentBuilder::Element(self.builder()) + } +} + +pub enum ContentBuilder { + Element(ElementBuilder), + Text(String), +} + +impl ContentBuilder { + pub fn build(&self) -> Result { + match self { + ContentBuilder::Element(element_builder) => { + Ok(Content::Element(element_builder.build()?)) + } + ContentBuilder::Text(text) => Ok(Content::Text(text.to_string())), + } + } } pub fn escape_str(s: &str) -> String { diff --git a/src/error.rs b/src/error.rs index 85b5d70..dd8ea17 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,10 +1,27 @@ -use std::{num::ParseIntError, str::Utf8Error}; +use std::{ + collections::{HashMap, VecDeque}, + num::ParseIntError, + str::{FromStr, Utf8Error}, +}; use crate::{ element::{Content, Name, NamespaceDeclaration}, Element, }; +#[derive(Debug)] +pub enum DeserializeError { + FromStr(String), + UnexpectedAttributes(HashMap), + UnexpectedContent(VecDeque), + MissingAttribute(Name), + IncorrectName(String), + IncorrectNamespace(String), + Unqualified, + MissingChild, + MissingValue, +} + #[derive(Debug)] pub enum Error { ReadError(std::io::Error), @@ -26,6 +43,13 @@ pub enum Error { UnexpectedNumberOfContents(usize), UnexpectedContent(Content), UnexpectedElement(Name), + Deserialize(DeserializeError), +} + +impl From for Error { + fn from(e: DeserializeError) -> Self { + Self::Deserialize(e) + } } impl From for Error { diff --git a/src/lib.rs b/src/lib.rs index 2e38d4e..30e6051 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace"; pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/"; pub use element::Element; +pub use error::DeserializeError; pub use error::Error; pub use reader::Reader; pub use writer::Writer; diff --git a/src/reader.rs b/src/reader.rs index e6bb57c..aa4d467 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -2,7 +2,7 @@ use circular::Buffer; use futures::{FutureExt, Stream}; use nom::Err; use std::{ - collections::{hash_set, BTreeMap, HashMap, HashSet}, + collections::{hash_set, BTreeMap, HashMap, HashSet, VecDeque}, future::Future, path::Prefix, pin::{pin, Pin}, @@ -102,12 +102,12 @@ where pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result { let element = self.read_start_tag().await?; - FromElement::from_element(element) + Ok(FromElement::from_element(element)?) } pub async fn read<'s, T: FromElement>(&'s mut self) -> Result { let element = self.read_element().await?; - FromElement::from_element(element) + Ok(FromElement::from_element(element)?) } pub async fn read_start_tag<'s>(&'s mut self) -> Result { @@ -438,7 +438,7 @@ impl Reader { name: element_name, namespace_declaration_overrides: element_namespace_declarations, attributes, - content: Vec::new(), + content: VecDeque::new(), }); } @@ -652,7 +652,7 @@ impl Reader { namespace_declarations.pop(); } else { - content = Vec::new(); + content = VecDeque::new(); } return Ok(Element { @@ -666,18 +666,18 @@ impl Reader { fn content_from_xml( namespaces: &mut Vec>, xml_content: xml::Content, - ) -> Result> { - let mut content = Vec::new(); + ) -> Result> { + let mut content = VecDeque::new(); let mut text = xml_content.char_data.map(|str| String::from(*str)); for (content_item, char_data) in xml_content.content { match content_item { xml::ContentItem::Element(element) => { text.map(|text| { if !text.is_empty() { - content.push(Content::Text(text)) + content.push_back(Content::Text(text)) } }); - content.push(Content::Element(Self::element_from_xml( + content.push_back(Content::Element(Self::element_from_xml( namespaces, element, )?)); text = char_data.map(|str| String::from(*str)); @@ -711,7 +711,7 @@ impl Reader { } text.map(|text| { if !text.is_empty() { - content.push(Content::Text(text)) + content.push_back(Content::Text(text)) } }); Ok(content)