create element builder and refactor api

This commit is contained in:
cel 🌸 2024-11-28 18:00:30 +00:00
parent 381af38a09
commit aa940a8eac
4 changed files with 567 additions and 17 deletions

View File

@ -1,25 +1,33 @@
// elements resemble a final tree, including inherited namespace information // elements resemble a final tree, including inherited namespace information
#![feature(drain_filter)]
use std::{ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet, VecDeque},
convert::Infallible, convert::Infallible,
str::FromStr, str::FromStr,
}; };
use crate::{ use crate::{
error::Error, error::{DeserializeError, Error},
xml::{self, parsers_complete::Parser, Attribute}, xml::{self, parsers_complete::Parser, Attribute},
Result, Result,
}; };
pub type DeserializeResult<T> = std::result::Result<T, DeserializeError>;
pub trait FromElement: Sized { pub trait FromElement: Sized {
fn from_element(element: Element) -> Result<Self>; fn from_element(element: Element) -> DeserializeResult<Self>;
} }
pub trait IntoElement { pub trait IntoElement {
fn into_element(&self) -> Element; fn builder(&self) -> ElementBuilder;
fn get_content(&self) -> Vec<Content> { fn into_element(&self) -> Element {
self.builder().build().unwrap()
}
fn get_content(&self) -> VecDeque<Content> {
let element = self.into_element(); let element = self.into_element();
element.content element.content
} }
@ -67,7 +75,524 @@ pub struct Element {
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified. // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
pub attributes: HashMap<Name, String>, pub attributes: HashMap<Name, String>,
// TODO: make a hashmap maybe? to be able to address parts of the content individually // TODO: make a hashmap maybe? to be able to address parts of the content individually
pub content: Vec<Content>, pub content: VecDeque<Content>,
}
impl Element {
pub fn identify(&self) -> (Option<&str>, &str) {
(self.name.namespace.as_deref(), &self.name.local_name)
}
pub fn check_name(&self, name: &str) -> DeserializeResult<()> {
if self.name.local_name == name {
Ok(())
} else {
return Err(DeserializeError::IncorrectName(
self.name.local_name.clone(),
));
}
}
pub fn check_namespace(&self, namespace: &str) -> DeserializeResult<()> {
if self.name.namespace.as_deref() == Some(namespace) {
return Ok(());
} else {
if let Some(namespace) = &self.name.namespace {
return Err(DeserializeError::IncorrectNamespace(namespace.clone()));
} else {
return Err(DeserializeError::Unqualified);
}
}
}
pub fn attribute_opt<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<Option<V>> {
if let Some(att_value) = self.attributes.remove(&Name {
namespace: None,
local_name: att_name.to_string(),
}) {
let value = <V as FromStr>::from_str(&att_value)
.map_err(|_| DeserializeError::FromStr(att_value))?;
return Ok(Some(value));
} else {
return Ok(None);
}
}
pub fn attribute_opt_namespaced<V: FromStr>(
&mut self,
att_name: &str,
att_namespace: &str,
) -> DeserializeResult<Option<V>> {
if let Some(att_value) = self.attributes.remove(&Name {
namespace: Some(att_namespace.to_string()),
local_name: att_name.to_string(),
}) {
let value = <V as FromStr>::from_str(&att_value)
.map_err(|_| DeserializeError::FromStr(att_value))?;
return Ok(Some(value));
} else {
return Ok(None);
}
}
pub fn attribute<V: FromStr>(&mut self, att_name: &str) -> DeserializeResult<V> {
let name = Name {
namespace: None,
local_name: att_name.to_string(),
};
if let Some(att_value) = self.attributes.remove(&name) {
let value = <V as FromStr>::from_str(&att_value)
.map_err(|_| DeserializeError::FromStr(att_value))?;
return Ok(value);
} else {
return Err(DeserializeError::MissingAttribute(name));
}
}
pub fn attribute_namespaced<V: FromStr>(
&mut self,
att_name: &str,
att_namespace: &str,
) -> DeserializeResult<V> {
let name = Name {
namespace: Some(att_namespace.to_string()),
local_name: att_name.to_string(),
};
if let Some(att_value) = self.attributes.remove(&name) {
let value = <V as FromStr>::from_str(&att_value)
.map_err(|_| DeserializeError::FromStr(att_value))?;
return Ok(value);
} else {
return Err(DeserializeError::MissingAttribute(name));
}
}
pub fn no_more_attributes(self) -> DeserializeResult<Self> {
if self.attributes.is_empty() {
Ok(self)
} else {
Err(DeserializeError::UnexpectedAttributes(self.attributes))
}
}
// for xs:any
pub fn child_one<T: FromElement>(&mut self) -> DeserializeResult<T> {
if let Some(position) = self.content.iter().position(|content| match content {
Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(),
Content::Text(_) => false,
Content::PI => false,
Content::Comment(_) => false,
}) {
let element = self.content.remove(position).unwrap();
if let Content::Element(e) = element {
return <T as FromElement>::from_element(e);
} else {
return Err(DeserializeError::MissingChild);
}
} else {
return Err(DeserializeError::MissingChild);
}
}
pub fn child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> {
if let Some(position) = self.content.iter().position(|content| match content {
Content::Element(element) => <T as FromElement>::from_element(element.clone()).is_ok(),
Content::Text(_) => false,
Content::PI => false,
Content::Comment(_) => false,
}) {
let element = self.content.remove(position).unwrap();
if let Content::Element(e) = element {
return Ok(Some(<T as FromElement>::from_element(e)?));
} else {
return Err(DeserializeError::MissingChild);
}
} else {
return Ok(None);
}
}
pub fn children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> {
let (children, rest): (VecDeque<_>, VecDeque<_>) = self
.content
.clone()
.into_iter()
.partition(|content| match content {
Content::Element(element) => {
<T as FromElement>::from_element(element.clone()).is_ok()
}
Content::Text(_) => false,
Content::PI => false,
Content::Comment(_) => false,
});
self.content = rest;
let children: Vec<T> = children
.into_iter()
.map(|content| {
let child = match content {
Content::Element(element) => <T as FromElement>::from_element(element).ok(),
Content::Text(_) => None,
Content::PI => None,
Content::Comment(_) => None,
}
.unwrap();
child
})
.collect();
Ok(children)
}
pub fn value<V: FromStr>(&mut self) -> DeserializeResult<V> {
if let Some(position) = self.content.iter().position(|content| match content {
Content::Element(_) => false,
Content::Text(s) => <V as FromStr>::from_str(s).is_ok(),
Content::PI => false,
Content::Comment(_) => false,
}) {
let element = self.content.remove(position).unwrap();
if let Content::Text(v) = element {
return Ok(<V as FromStr>::from_str(&v).ok().unwrap());
} else {
panic!("infallible")
}
} else {
return Err(DeserializeError::MissingValue);
}
}
pub fn value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> {
if let Some(position) = self.content.iter().position(|content| match content {
Content::Element(_) => false,
Content::Text(s) => <V as FromStr>::from_str(s).is_ok(),
Content::PI => false,
Content::Comment(_) => false,
}) {
let element = self.content.remove(position).unwrap();
if let Content::Text(v) = element {
return Ok(<V as FromStr>::from_str(&v).ok());
} else {
panic!("infallible")
}
} else {
return Ok(None);
}
}
// for xs:sequence
pub fn pop_child_one<T: FromElement>(&mut self) -> DeserializeResult<T> {
loop {
let child = self
.content
.pop_front()
.ok_or(DeserializeError::MissingChild)?;
match child {
Content::Element(element) => return Ok(<T as FromElement>::from_element(element)?),
Content::Text(_) => {
return Err(DeserializeError::UnexpectedContent(self.content.clone()))
}
Content::PI => {}
Content::Comment(_) => {}
}
}
}
pub fn pop_child_opt<T: FromElement>(&mut self) -> DeserializeResult<Option<T>> {
loop {
let child = self.content.pop_front();
if let Some(child) = child {
match child {
Content::Element(element) => {
return Ok(Some(<T as FromElement>::from_element(element)?))
}
Content::Text(_) => {
return Err(DeserializeError::UnexpectedContent(self.content.clone()))
}
Content::PI => {}
Content::Comment(_) => {}
}
} else {
return Ok(None);
}
}
}
pub fn pop_children<T: FromElement>(&mut self) -> DeserializeResult<Vec<T>> {
let mut children = Vec::new();
loop {
let child = self.content.front();
if let Some(child) = child {
match child {
Content::Element(element) => {
if let Ok(child) = <T as FromElement>::from_element(element.clone()) {
children.push(child);
self.content.pop_front();
}
}
Content::Text(_) => return Ok(children),
Content::PI => {}
Content::Comment(_) => {}
}
} else {
return Ok(children);
}
}
}
pub fn pop_value<V: FromStr>(&mut self) -> DeserializeResult<V> {
loop {
let child = self
.content
.pop_front()
.ok_or(DeserializeError::MissingChild)?;
match child {
Content::Element(_) => {
return Err(DeserializeError::UnexpectedContent(self.content.clone()))
}
Content::Text(t) => {
return Ok(
<V as FromStr>::from_str(&t).map_err(|_| DeserializeError::FromStr(t))?
)
}
Content::PI => {}
Content::Comment(_) => {}
}
}
}
pub fn pop_value_opt<V: FromStr>(&mut self) -> DeserializeResult<Option<V>> {
loop {
let child = self.content.pop_front();
if let Some(child) = child {
match child {
Content::Element(_) => {
return Err(DeserializeError::UnexpectedContent(self.content.clone()))
}
Content::Text(t) => {
return Ok(Some(
<V as FromStr>::from_str(&t)
.map_err(|_| DeserializeError::FromStr(t))?,
))
}
Content::PI => {}
Content::Comment(_) => {}
}
} else {
return Ok(None);
}
}
}
pub fn no_more_content(self) -> DeserializeResult<Self> {
if self
.content
.iter()
.filter(|content| match content {
Content::Element(_) => true,
Content::Text(_) => true,
Content::PI => false,
Content::Comment(_) => false,
})
.collect::<Vec<_>>()
.is_empty()
{
Ok(self)
} else {
Err(DeserializeError::UnexpectedContent(self.content))
}
}
pub fn builder(name: impl ToString, namespace: Option<impl ToString>) -> ElementBuilder {
ElementBuilder::new(name, namespace)
}
}
pub struct ElementBuilder {
name: Name,
namespace_declaration_overrides: Vec<NamespaceDeclaration>,
attributes: Vec<(Name, String)>,
content: Vec<ContentBuilder>,
}
impl ElementBuilder {
pub fn new(name: impl ToString, namespace: Option<impl ToString>) -> Self {
Self {
name: Name {
namespace: namespace.map(|namespace| namespace.to_string()),
local_name: name.to_string(),
},
namespace_declaration_overrides: Vec::new(),
attributes: Vec::new(),
content: Vec::new(),
}
}
pub fn push_namespace_declaration_override(
mut self,
prefix: Option<impl ToString>,
namespace: impl ToString,
) -> Self {
self.namespace_declaration_overrides
.push(NamespaceDeclaration {
prefix: prefix.map(|prefix| prefix.to_string()),
namespace: namespace.to_string(),
});
self
}
pub fn push_attribute<N: ToString, V: ToString>(mut self, name: N, value: V) -> Self {
self.attributes.push((
// TODO: make sure name is a valid name, same for prefixes
Name {
namespace: None,
local_name: name.to_string(),
},
value.to_string(),
));
self
}
pub fn push_attribute_namespaced(
mut self,
namespace: impl ToString,
name: impl ToString,
value: impl ToString,
) -> Self {
self.attributes.push((
Name {
namespace: Some(namespace.to_string()),
local_name: name.to_string(),
},
value.to_string(),
));
self
}
pub fn push_child(mut self, child: ElementBuilder) -> Self {
self.content.push(ContentBuilder::Element(child));
self
}
pub fn push_text(mut self, text: impl ToString) -> Self {
self.content.push(ContentBuilder::Text(text.to_string()));
self
}
pub fn push_attribute_opt(self, name: impl ToString, value: Option<impl ToString>) -> Self {
if let Some(value) = value {
self.push_attribute(name, value)
} else {
self
}
}
pub fn push_attribute_opt_namespaced(
self,
namespace: impl ToString,
name: impl ToString,
value: Option<impl ToString>,
) -> Self {
if let Some(value) = value {
self.push_attribute_namespaced(namespace, name, value)
} else {
self
}
}
pub fn push_child_opt(self, child: Option<ElementBuilder>) -> Self {
if let Some(child) = child {
self.push_child(child)
} else {
self
}
}
pub fn push_text_opt(self, text: Option<impl ToString>) -> Self {
if let Some(text) = text {
self.push_text(text)
} else {
self
}
}
pub fn push_content(mut self, content: ContentBuilder) -> Self {
self.content.push(content);
self
}
pub fn push_children(self, children: Vec<impl IntoContent>) -> Self {
let mut element_builder = self;
for child in children {
element_builder = element_builder.push_content(child.builder())
}
element_builder
}
pub fn build(&self) -> Result<Element> {
let mut namespace_declaration_overrides = HashSet::new();
for namespace_declaration in &self.namespace_declaration_overrides {
if !namespace_declaration_overrides.insert(namespace_declaration.clone()) {
return Err(Error::DuplicateNameSpaceDeclaration(
namespace_declaration.clone(),
));
}
}
let mut attributes = HashMap::new();
for (att_name, att_value) in &self.attributes {
if attributes
.insert(att_name.clone(), att_value.to_string())
.is_some()
{
// TODO: better error
return Err(Error::DuplicateAttribute(att_name.local_name.to_string()));
}
}
let content: Result<VecDeque<Content>> = self
.content
.iter()
.map(|content_builder| -> Result<Content> { Ok(content_builder.build()?) })
.collect();
let content = content?;
Ok(Element {
name: self.name.clone(),
namespace_declaration_overrides,
attributes,
content,
})
}
}
pub trait IntoContent {
fn builder(&self) -> ContentBuilder;
}
impl<T> IntoContent for T
where
T: IntoElement,
{
fn builder(&self) -> ContentBuilder {
ContentBuilder::Element(self.builder())
}
}
pub enum ContentBuilder {
Element(ElementBuilder),
Text(String),
}
impl ContentBuilder {
pub fn build(&self) -> Result<Content> {
match self {
ContentBuilder::Element(element_builder) => {
Ok(Content::Element(element_builder.build()?))
}
ContentBuilder::Text(text) => Ok(Content::Text(text.to_string())),
}
}
} }
pub fn escape_str(s: &str) -> String { pub fn escape_str(s: &str) -> String {

View File

@ -1,10 +1,27 @@
use std::{num::ParseIntError, str::Utf8Error}; use std::{
collections::{HashMap, VecDeque},
num::ParseIntError,
str::{FromStr, Utf8Error},
};
use crate::{ use crate::{
element::{Content, Name, NamespaceDeclaration}, element::{Content, Name, NamespaceDeclaration},
Element, Element,
}; };
#[derive(Debug)]
pub enum DeserializeError {
FromStr(String),
UnexpectedAttributes(HashMap<Name, String>),
UnexpectedContent(VecDeque<Content>),
MissingAttribute(Name),
IncorrectName(String),
IncorrectNamespace(String),
Unqualified,
MissingChild,
MissingValue,
}
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
ReadError(std::io::Error), ReadError(std::io::Error),
@ -26,6 +43,13 @@ pub enum Error {
UnexpectedNumberOfContents(usize), UnexpectedNumberOfContents(usize),
UnexpectedContent(Content), UnexpectedContent(Content),
UnexpectedElement(Name), UnexpectedElement(Name),
Deserialize(DeserializeError),
}
impl From<DeserializeError> for Error {
fn from(e: DeserializeError) -> Self {
Self::Deserialize(e)
}
} }
impl From<std::io::Error> for Error { impl From<std::io::Error> for Error {

View File

@ -11,6 +11,7 @@ pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace";
pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/"; pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/";
pub use element::Element; pub use element::Element;
pub use error::DeserializeError;
pub use error::Error; pub use error::Error;
pub use reader::Reader; pub use reader::Reader;
pub use writer::Writer; pub use writer::Writer;

View File

@ -2,7 +2,7 @@ use circular::Buffer;
use futures::{FutureExt, Stream}; use futures::{FutureExt, Stream};
use nom::Err; use nom::Err;
use std::{ use std::{
collections::{hash_set, BTreeMap, HashMap, HashSet}, collections::{hash_set, BTreeMap, HashMap, HashSet, VecDeque},
future::Future, future::Future,
path::Prefix, path::Prefix,
pin::{pin, Pin}, pin::{pin, Pin},
@ -102,12 +102,12 @@ where
pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> { pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> {
let element = self.read_start_tag().await?; let element = self.read_start_tag().await?;
FromElement::from_element(element) Ok(FromElement::from_element(element)?)
} }
pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> { pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> {
let element = self.read_element().await?; let element = self.read_element().await?;
FromElement::from_element(element) Ok(FromElement::from_element(element)?)
} }
pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> { pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
@ -438,7 +438,7 @@ impl<R> Reader<R> {
name: element_name, name: element_name,
namespace_declaration_overrides: element_namespace_declarations, namespace_declaration_overrides: element_namespace_declarations,
attributes, attributes,
content: Vec::new(), content: VecDeque::new(),
}); });
} }
@ -652,7 +652,7 @@ impl<R> Reader<R> {
namespace_declarations.pop(); namespace_declarations.pop();
} else { } else {
content = Vec::new(); content = VecDeque::new();
} }
return Ok(Element { return Ok(Element {
@ -666,18 +666,18 @@ impl<R> Reader<R> {
fn content_from_xml( fn content_from_xml(
namespaces: &mut Vec<HashSet<NamespaceDeclaration>>, namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
xml_content: xml::Content, xml_content: xml::Content,
) -> Result<Vec<Content>> { ) -> Result<VecDeque<Content>> {
let mut content = Vec::new(); let mut content = VecDeque::new();
let mut text = xml_content.char_data.map(|str| String::from(*str)); let mut text = xml_content.char_data.map(|str| String::from(*str));
for (content_item, char_data) in xml_content.content { for (content_item, char_data) in xml_content.content {
match content_item { match content_item {
xml::ContentItem::Element(element) => { xml::ContentItem::Element(element) => {
text.map(|text| { text.map(|text| {
if !text.is_empty() { if !text.is_empty() {
content.push(Content::Text(text)) content.push_back(Content::Text(text))
} }
}); });
content.push(Content::Element(Self::element_from_xml( content.push_back(Content::Element(Self::element_from_xml(
namespaces, element, namespaces, element,
)?)); )?));
text = char_data.map(|str| String::from(*str)); text = char_data.map(|str| String::from(*str));
@ -711,7 +711,7 @@ impl<R> Reader<R> {
} }
text.map(|text| { text.map(|text| {
if !text.is_empty() { if !text.is_empty() {
content.push(Content::Text(text)) content.push_back(Content::Text(text))
} }
}); });
Ok(content) Ok(content)