From c8ed16a2d1d1f1b5278936eeadeae87da7cb104e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= Date: Tue, 19 Nov 2024 14:52:14 +0000 Subject: [PATCH] store only namespace name in qualified name, not namespace declaration --- src/element.rs | 6 +- src/error.rs | 5 +- src/reader.rs | 121 ++++++++++++++++++++++++------------ src/writer.rs | 43 ++++++++++--- src/xml/parsers_complete.rs | 13 ++++ 5 files changed, 136 insertions(+), 52 deletions(-) diff --git a/src/element.rs b/src/element.rs index 9bdde35..4c39c6a 100644 --- a/src/element.rs +++ b/src/element.rs @@ -10,7 +10,7 @@ use crate::{ // when are namespaces names chosen then if they are automatically calculated // namespaces are held by readers and writers. #[derive(PartialEq, Eq, Hash, Clone, Debug)] -pub struct Namespace { +pub struct NamespaceDeclaration { pub prefix: Option, pub namespace: String, } @@ -18,7 +18,7 @@ pub struct Namespace { // names are qualified, they contain a reference to the namespace (held within the reader/writer) #[derive(PartialEq, Eq, Hash, Clone, Debug)] pub struct Name { - pub namespace: Namespace, + pub namespace: String, pub name: String, } @@ -40,7 +40,7 @@ pub struct Element { // namespace: String, // hashmap of explicit namespace declarations on the element itself only // possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader. - pub namespace_decl: HashSet, + // pub namespace_decl: HashSet, // attributes can be in a different namespace than the element. how to make sure they are valid? // maybe include the namespace instead of or with the prefix // you can calculate the prefix from the namespaced name and the current writer context diff --git a/src/error.rs b/src/error.rs index c84c7d0..2d96666 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,6 +1,6 @@ use std::{num::ParseIntError, str::Utf8Error}; -use crate::element::{Name, Namespace}; +use crate::element::{Name, NamespaceDeclaration}; #[derive(Debug)] pub enum Error { @@ -10,11 +10,12 @@ pub enum Error { EntityProcessError(String), // TODO: better choice for failures than string InvalidCharRef(String), - DuplicateNameSpace(Namespace), + DuplicateNameSpaceDeclaration(NamespaceDeclaration), DuplicateAttribute(String), UnqualifiedNamespace(String), MismatchedEndTag(String, String), NotInElement(String), + ExtraData(String), } impl From for Error { diff --git a/src/reader.rs b/src/reader.rs index 64a0ed8..654ca2a 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -13,7 +13,7 @@ use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt}; static MAX_STANZA_SIZE: usize = 65536; use crate::{ - element::{Content, Element, Name, Namespace}, + element::{Content, Element, Name, NamespaceDeclaration}, error::Error, xml::{self, parsers::Parser}, Result, @@ -26,7 +26,7 @@ pub struct Reader { // holds which tags we are in atm over depth // to have names reference namespaces could depth: Vec, - namespaces: Vec>, + namespace_declarations: Vec>, } impl Reader { @@ -35,7 +35,7 @@ impl Reader { inner: reader, buffer: Buffer::with_capacity(MAX_STANZA_SIZE), depth: Vec::new(), - namespaces: Vec::new(), + namespace_declarations: Vec::new(), } } } @@ -75,8 +75,11 @@ where match xml::STag::parse(input) { Ok((rest, e)) => { let len = self.buffer.available_data() - rest.as_bytes().len(); - let element = - Reader::::start_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?; + let element = Reader::::start_tag_from_xml( + &mut self.depth, + &mut self.namespace_declarations, + e, + )?; self.buffer.consume(len); return Ok(element); } @@ -97,7 +100,11 @@ where match xml::ETag::parse(input) { Ok((rest, e)) => { let len = self.buffer.available_data() - rest.as_bytes().len(); - Reader::::end_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?; + Reader::::end_tag_from_xml( + &mut self.depth, + &mut self.namespace_declarations, + e, + )?; self.buffer.consume(len); return Ok(()); } @@ -118,7 +125,8 @@ where match xml::Element::parse(input) { Ok((rest, e)) => { let len = self.buffer.available_data() - rest.as_bytes().len(); - let element = Reader::::element_from_xml(&mut self.namespaces, e)?; + let element = + Reader::::element_from_xml(&mut self.namespace_declarations, e)?; self.buffer.consume(len); return Ok(element); } @@ -156,8 +164,10 @@ where } else { let len = self.buffer.available_data() - rest.as_bytes().len(); - let element = - Self::element_from_xml(&mut self.namespaces, element)?; + let element = Self::element_from_xml( + &mut self.namespace_declarations, + element, + )?; self.buffer.consume(len); return Ok(Content::Element(element)); } @@ -209,12 +219,15 @@ where match xml::ContentItem::parse(input) { Ok((rest, content_item)) => match content_item { xml::ContentItem::Element(element) => { + // text can still be empty if !text.is_empty() { return Ok(Content::Text(text)); } else { let len = self.buffer.available_data() - rest.as_bytes().len(); - let element = - Self::element_from_xml(&mut self.namespaces, element)?; + let element = Self::element_from_xml( + &mut self.namespace_declarations, + element, + )?; self.buffer.consume(len); return Ok(Content::Element(element)); } @@ -264,7 +277,7 @@ where impl Reader { fn start_tag_from_xml( depth: &mut Vec, - namespaces: &mut Vec>, + namespaces: &mut Vec>, s_tag: xml::STag, ) -> Result { let mut namespace_declarations = HashSet::new(); @@ -281,17 +294,17 @@ impl Reader { } xml::NSAttName::DefaultAttName => None, }; - let namespace = Namespace { + let namespace = NamespaceDeclaration { prefix, namespace: namespace.process()?, }; if !namespace_declarations.insert(namespace.clone()) { - return Err(Error::DuplicateNameSpace(namespace)); + return Err(Error::DuplicateNameSpaceDeclaration(namespace)); } } // all namespaces available to the element (from both parent elements and element itself) - let namespace_stack: Vec<&Namespace> = namespaces + let namespace_stack: Vec<&NamespaceDeclaration> = namespaces .iter() .flatten() .chain(namespace_declarations.iter()) @@ -322,10 +335,9 @@ impl Reader { attribute_name = unprefixed_name.to_string(); } } - if let Some(namespace) = namespace { - let namespace = (*namespace).clone(); + if let Some(namespace_declaration) = namespace { let name = Name { - namespace, + namespace: namespace_declaration.namespace.clone(), name: attribute_name, }; let value = value.process()?; @@ -354,11 +366,14 @@ impl Reader { } } - let namespace = (*namespace + let namespace_declaration = (*namespace .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?) .clone(); - let name = Name { namespace, name }; + let name = Name { + namespace: namespace_declaration.namespace, + name, + }; depth.push(name.clone()); @@ -366,7 +381,6 @@ impl Reader { return Ok(Element { name, - namespace_decl: namespace_declarations, attributes, content: Vec::new(), }); @@ -374,13 +388,38 @@ impl Reader { fn end_tag_from_xml( depth: &mut Vec, - namespaces: &mut Vec>, + namespaces: &mut Vec>, e_tag: xml::ETag, ) -> Result<()> { if let Some(s_tag_name) = depth.pop() { - if s_tag_name.namespace.prefix.as_deref() == e_tag.name.prefix() - && s_tag_name.name == e_tag.name.local_part() - { + let (namespace, name); + let namespace_declarations: Vec<_> = namespaces.iter().flatten().collect(); + match e_tag.name { + xml::QName::PrefixedName(ref prefixed_name) => { + namespace = namespace_declarations + .iter() + .rfind(|namespace| { + namespace.prefix.as_deref() == Some(**prefixed_name.prefix) + }) + .map(|namespace_decl| namespace_decl.namespace.clone()) + .ok_or_else(|| { + return Error::UnqualifiedNamespace((&e_tag.name).to_string()); + })?; + name = prefixed_name.local_part.to_string(); + } + xml::QName::UnprefixedName(ref unprefixed_name) => { + namespace = namespace_declarations + .iter() + .rfind(|namespace| namespace.prefix.as_deref() == None) + .map(|namespace_decl| namespace_decl.namespace.clone()) + .ok_or_else(|| { + return Error::UnqualifiedNamespace(e_tag.name.to_string()); + })?; + name = unprefixed_name.to_string(); + } + } + let e_tag_name = Name { namespace, name }; + if s_tag_name == e_tag_name { namespaces.pop(); return Ok(()); } else { @@ -395,7 +434,7 @@ impl Reader { } fn element_from_xml( - namespaces: &mut Vec>, + namespaces: &mut Vec>, element: xml::Element, ) -> Result { match element { @@ -416,17 +455,17 @@ impl Reader { } xml::NSAttName::DefaultAttName => None, }; - let namespace = Namespace { + let namespace = NamespaceDeclaration { prefix, namespace: namespace.process()?, }; if !namespace_declarations.insert(namespace.clone()) { - return Err(Error::DuplicateNameSpace(namespace)); + return Err(Error::DuplicateNameSpaceDeclaration(namespace)); } } // all namespaces available to the element (from both parent elements and element itself) - let namespace_stack: Vec<&Namespace> = namespaces + let namespace_stack: Vec<&NamespaceDeclaration> = namespaces .iter() .flatten() .chain(namespace_declarations.iter()) @@ -460,7 +499,7 @@ impl Reader { if let Some(namespace) = namespace { let namespace = (*namespace).clone(); let name = Name { - namespace, + namespace: namespace.namespace, name: attribute_name, }; let value = value.process()?; @@ -493,11 +532,13 @@ impl Reader { .ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?) .clone(); - let name = Name { namespace, name }; + let name = Name { + namespace: namespace.namespace, + name, + }; return Ok(Element { name, - namespace_decl: namespace_declarations, attributes, content: Vec::new(), }); @@ -523,17 +564,17 @@ impl Reader { } xml::NSAttName::DefaultAttName => None, }; - let namespace = Namespace { + let namespace = NamespaceDeclaration { prefix, namespace: namespace.process()?, }; if !namespace_declarations.insert(namespace.clone()) { - return Err(Error::DuplicateNameSpace(namespace)); + return Err(Error::DuplicateNameSpaceDeclaration(namespace)); } } // all namespaces available to the element (from both parent elements and element itself) - let namespace_stack: Vec<&Namespace> = namespaces + let namespace_stack: Vec<&NamespaceDeclaration> = namespaces .iter() .flatten() .chain(namespace_declarations.iter()) @@ -567,7 +608,7 @@ impl Reader { if let Some(namespace) = namespace { let namespace = (*namespace).clone(); let name = Name { - namespace, + namespace: namespace.namespace, name: attribute_name, }; let value = value.process()?; @@ -600,7 +641,10 @@ impl Reader { .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?) .clone(); - let name = Name { namespace, name }; + let name = Name { + namespace: namespace.namespace, + name, + }; namespaces.push(namespace_declarations.clone()); @@ -610,7 +654,6 @@ impl Reader { return Ok(Element { name, - namespace_decl: namespace_declarations, attributes, content, }); @@ -619,7 +662,7 @@ impl Reader { } fn content_from_xml( - namespaces: &mut Vec>, + namespaces: &mut Vec>, element: xml::Content, ) -> Result> { let mut content = Vec::new(); diff --git a/src/writer.rs b/src/writer.rs index 08be8c2..249ced5 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -1,26 +1,53 @@ -use futures::{AsyncWrite, Sink}; +use std::collections::HashSet; + +use futures::Sink; +use tokio::io::AsyncWrite; use crate::{ - element::{Element, Name, Namespace}, + element::{Element, Name, NamespaceDeclaration}, error::Error, + xml::{self, composers::Composer, parsers_complete::Parser, ETag}, }; // pub struct Writer { pub struct Writer { - writer: W, + inner: W, depth: Vec, - namespaces: Vec<(usize, Namespace)>, + namespaces: Vec>, } -impl Writer { - pub async fn write(&self, element: impl Into) -> Result<(), Error> { +impl Writer { + pub async fn write(&mut self, element: Element) -> Result<(), Error> { todo!() } - pub async fn write_start(&self, element: impl Into) -> Result<(), Error> { + + pub async fn write_start(&mut self, element: Element) -> Result<(), Error> { todo!() } - pub async fn write_end(&self) -> Result<(), Error> { + + pub async fn write_end(&mut self) -> Result<(), Error> { todo!() + // let e_tag; + // if let Some(name) = self.depth.pop() { + // if let Some(prefix) = name.namespace.prefix { + // e_tag = xml::ETag { + // name: xml::QName::PrefixedName(xml::PrefixedName { + // prefix: xml::Prefix::parse_full(&prefix)?, + // local_part: xml::LocalPart::parse_full(&name.name)?, + // }), + // }; + // e_tag.write(&mut self.inner).await?; + // Ok(()) + // } else { + // e_tag = xml::ETag { + // name: xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.name)?), + // }; + // e_tag.write(&mut self.inner).await?; + // Ok(()) + // } + // } else { + // return Err(Error::NotInElement("".to_string())); + // } } } diff --git a/src/xml/parsers_complete.rs b/src/xml/parsers_complete.rs index b057f62..900a3dd 100644 --- a/src/xml/parsers_complete.rs +++ b/src/xml/parsers_complete.rs @@ -30,6 +30,19 @@ pub trait Parser<'s> { type Output; fn parse(input: &'s str) -> IResult<&str, Self::Output>; + + fn parse_full(input: &'s str) -> crate::Result { + match ::parse(input) { + Ok((rest, output)) => { + if rest.is_empty() { + return Ok(output); + } else { + return Err(crate::error::Error::ExtraData(rest.to_string())); + } + } + Result::Err(e) => return Err(crate::error::Error::ParseError(e.to_string())), + } + } } /// [1] NSAttName ::= PrefixedAttName | DefaultAttName