store only namespace name in qualified name, not namespace declaration

This commit is contained in:
cel 🌸 2024-11-19 14:52:14 +00:00
parent 4c235b972e
commit c8ed16a2d1
5 changed files with 136 additions and 52 deletions

View File

@ -10,7 +10,7 @@ use crate::{
// when are namespaces names chosen then if they are automatically calculated
// namespaces are held by readers and writers.
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub struct Namespace {
pub struct NamespaceDeclaration {
pub prefix: Option<String>,
pub namespace: String,
}
@ -18,7 +18,7 @@ pub struct Namespace {
// names are qualified, they contain a reference to the namespace (held within the reader/writer)
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub struct Name {
pub namespace: Namespace,
pub namespace: String,
pub name: String,
}
@ -40,7 +40,7 @@ pub struct Element {
// namespace: String,
// hashmap of explicit namespace declarations on the element itself only
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
pub namespace_decl: HashSet<Namespace>,
// pub namespace_decl: HashSet<Namespace>,
// attributes can be in a different namespace than the element. how to make sure they are valid?
// maybe include the namespace instead of or with the prefix
// you can calculate the prefix from the namespaced name and the current writer context

View File

@ -1,6 +1,6 @@
use std::{num::ParseIntError, str::Utf8Error};
use crate::element::{Name, Namespace};
use crate::element::{Name, NamespaceDeclaration};
#[derive(Debug)]
pub enum Error {
@ -10,11 +10,12 @@ pub enum Error {
EntityProcessError(String),
// TODO: better choice for failures than string
InvalidCharRef(String),
DuplicateNameSpace(Namespace),
DuplicateNameSpaceDeclaration(NamespaceDeclaration),
DuplicateAttribute(String),
UnqualifiedNamespace(String),
MismatchedEndTag(String, String),
NotInElement(String),
ExtraData(String),
}
impl From<std::io::Error> for Error {

View File

@ -13,7 +13,7 @@ use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
static MAX_STANZA_SIZE: usize = 65536;
use crate::{
element::{Content, Element, Name, Namespace},
element::{Content, Element, Name, NamespaceDeclaration},
error::Error,
xml::{self, parsers::Parser},
Result,
@ -26,7 +26,7 @@ pub struct Reader<R> {
// holds which tags we are in atm over depth
// to have names reference namespaces could
depth: Vec<Name>,
namespaces: Vec<HashSet<Namespace>>,
namespace_declarations: Vec<HashSet<NamespaceDeclaration>>,
}
impl<R> Reader<R> {
@ -35,7 +35,7 @@ impl<R> Reader<R> {
inner: reader,
buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
depth: Vec::new(),
namespaces: Vec::new(),
namespace_declarations: Vec::new(),
}
}
}
@ -75,8 +75,11 @@ where
match xml::STag::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
let element =
Reader::<R>::start_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
let element = Reader::<R>::start_tag_from_xml(
&mut self.depth,
&mut self.namespace_declarations,
e,
)?;
self.buffer.consume(len);
return Ok(element);
}
@ -97,7 +100,11 @@ where
match xml::ETag::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
Reader::<R>::end_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
Reader::<R>::end_tag_from_xml(
&mut self.depth,
&mut self.namespace_declarations,
e,
)?;
self.buffer.consume(len);
return Ok(());
}
@ -118,7 +125,8 @@ where
match xml::Element::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
let element = Reader::<R>::element_from_xml(&mut self.namespaces, e)?;
let element =
Reader::<R>::element_from_xml(&mut self.namespace_declarations, e)?;
self.buffer.consume(len);
return Ok(element);
}
@ -156,8 +164,10 @@ where
} else {
let len =
self.buffer.available_data() - rest.as_bytes().len();
let element =
Self::element_from_xml(&mut self.namespaces, element)?;
let element = Self::element_from_xml(
&mut self.namespace_declarations,
element,
)?;
self.buffer.consume(len);
return Ok(Content::Element(element));
}
@ -209,12 +219,15 @@ where
match xml::ContentItem::parse(input) {
Ok((rest, content_item)) => match content_item {
xml::ContentItem::Element(element) => {
// text can still be empty
if !text.is_empty() {
return Ok(Content::Text(text));
} else {
let len = self.buffer.available_data() - rest.as_bytes().len();
let element =
Self::element_from_xml(&mut self.namespaces, element)?;
let element = Self::element_from_xml(
&mut self.namespace_declarations,
element,
)?;
self.buffer.consume(len);
return Ok(Content::Element(element));
}
@ -264,7 +277,7 @@ where
impl<R> Reader<R> {
fn start_tag_from_xml(
depth: &mut Vec<Name>,
namespaces: &mut Vec<HashSet<Namespace>>,
namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
s_tag: xml::STag,
) -> Result<Element> {
let mut namespace_declarations = HashSet::new();
@ -281,17 +294,17 @@ impl<R> Reader<R> {
}
xml::NSAttName::DefaultAttName => None,
};
let namespace = Namespace {
let namespace = NamespaceDeclaration {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpace(namespace));
return Err(Error::DuplicateNameSpaceDeclaration(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
let namespace_stack: Vec<&Namespace> = namespaces
let namespace_stack: Vec<&NamespaceDeclaration> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
@ -322,10 +335,9 @@ impl<R> Reader<R> {
attribute_name = unprefixed_name.to_string();
}
}
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
if let Some(namespace_declaration) = namespace {
let name = Name {
namespace,
namespace: namespace_declaration.namespace.clone(),
name: attribute_name,
};
let value = value.process()?;
@ -354,11 +366,14 @@ impl<R> Reader<R> {
}
}
let namespace = (*namespace
let namespace_declaration = (*namespace
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
.clone();
let name = Name { namespace, name };
let name = Name {
namespace: namespace_declaration.namespace,
name,
};
depth.push(name.clone());
@ -366,7 +381,6 @@ impl<R> Reader<R> {
return Ok(Element {
name,
namespace_decl: namespace_declarations,
attributes,
content: Vec::new(),
});
@ -374,13 +388,38 @@ impl<R> Reader<R> {
fn end_tag_from_xml(
depth: &mut Vec<Name>,
namespaces: &mut Vec<HashSet<Namespace>>,
namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
e_tag: xml::ETag,
) -> Result<()> {
if let Some(s_tag_name) = depth.pop() {
if s_tag_name.namespace.prefix.as_deref() == e_tag.name.prefix()
&& s_tag_name.name == e_tag.name.local_part()
{
let (namespace, name);
let namespace_declarations: Vec<_> = namespaces.iter().flatten().collect();
match e_tag.name {
xml::QName::PrefixedName(ref prefixed_name) => {
namespace = namespace_declarations
.iter()
.rfind(|namespace| {
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
})
.map(|namespace_decl| namespace_decl.namespace.clone())
.ok_or_else(|| {
return Error::UnqualifiedNamespace((&e_tag.name).to_string());
})?;
name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(ref unprefixed_name) => {
namespace = namespace_declarations
.iter()
.rfind(|namespace| namespace.prefix.as_deref() == None)
.map(|namespace_decl| namespace_decl.namespace.clone())
.ok_or_else(|| {
return Error::UnqualifiedNamespace(e_tag.name.to_string());
})?;
name = unprefixed_name.to_string();
}
}
let e_tag_name = Name { namespace, name };
if s_tag_name == e_tag_name {
namespaces.pop();
return Ok(());
} else {
@ -395,7 +434,7 @@ impl<R> Reader<R> {
}
fn element_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>,
namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
element: xml::Element,
) -> Result<Element> {
match element {
@ -416,17 +455,17 @@ impl<R> Reader<R> {
}
xml::NSAttName::DefaultAttName => None,
};
let namespace = Namespace {
let namespace = NamespaceDeclaration {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpace(namespace));
return Err(Error::DuplicateNameSpaceDeclaration(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
let namespace_stack: Vec<&Namespace> = namespaces
let namespace_stack: Vec<&NamespaceDeclaration> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
@ -460,7 +499,7 @@ impl<R> Reader<R> {
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
let name = Name {
namespace,
namespace: namespace.namespace,
name: attribute_name,
};
let value = value.process()?;
@ -493,11 +532,13 @@ impl<R> Reader<R> {
.ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?)
.clone();
let name = Name { namespace, name };
let name = Name {
namespace: namespace.namespace,
name,
};
return Ok(Element {
name,
namespace_decl: namespace_declarations,
attributes,
content: Vec::new(),
});
@ -523,17 +564,17 @@ impl<R> Reader<R> {
}
xml::NSAttName::DefaultAttName => None,
};
let namespace = Namespace {
let namespace = NamespaceDeclaration {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpace(namespace));
return Err(Error::DuplicateNameSpaceDeclaration(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
let namespace_stack: Vec<&Namespace> = namespaces
let namespace_stack: Vec<&NamespaceDeclaration> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
@ -567,7 +608,7 @@ impl<R> Reader<R> {
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
let name = Name {
namespace,
namespace: namespace.namespace,
name: attribute_name,
};
let value = value.process()?;
@ -600,7 +641,10 @@ impl<R> Reader<R> {
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
.clone();
let name = Name { namespace, name };
let name = Name {
namespace: namespace.namespace,
name,
};
namespaces.push(namespace_declarations.clone());
@ -610,7 +654,6 @@ impl<R> Reader<R> {
return Ok(Element {
name,
namespace_decl: namespace_declarations,
attributes,
content,
});
@ -619,7 +662,7 @@ impl<R> Reader<R> {
}
fn content_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>,
namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
element: xml::Content,
) -> Result<Vec<Content>> {
let mut content = Vec::new();

View File

@ -1,26 +1,53 @@
use futures::{AsyncWrite, Sink};
use std::collections::HashSet;
use futures::Sink;
use tokio::io::AsyncWrite;
use crate::{
element::{Element, Name, Namespace},
element::{Element, Name, NamespaceDeclaration},
error::Error,
xml::{self, composers::Composer, parsers_complete::Parser, ETag},
};
// pub struct Writer<W, C = Composer> {
pub struct Writer<W> {
writer: W,
inner: W,
depth: Vec<Name>,
namespaces: Vec<(usize, Namespace)>,
namespaces: Vec<HashSet<NamespaceDeclaration>>,
}
impl<W: AsyncWrite> Writer<W> {
pub async fn write(&self, element: impl Into<Element>) -> Result<(), Error> {
impl<W: AsyncWrite + Unpin> Writer<W> {
pub async fn write(&mut self, element: Element) -> Result<(), Error> {
todo!()
}
pub async fn write_start(&self, element: impl Into<Element>) -> Result<(), Error> {
pub async fn write_start(&mut self, element: Element) -> Result<(), Error> {
todo!()
}
pub async fn write_end(&self) -> Result<(), Error> {
pub async fn write_end(&mut self) -> Result<(), Error> {
todo!()
// let e_tag;
// if let Some(name) = self.depth.pop() {
// if let Some(prefix) = name.namespace.prefix {
// e_tag = xml::ETag {
// name: xml::QName::PrefixedName(xml::PrefixedName {
// prefix: xml::Prefix::parse_full(&prefix)?,
// local_part: xml::LocalPart::parse_full(&name.name)?,
// }),
// };
// e_tag.write(&mut self.inner).await?;
// Ok(())
// } else {
// e_tag = xml::ETag {
// name: xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.name)?),
// };
// e_tag.write(&mut self.inner).await?;
// Ok(())
// }
// } else {
// return Err(Error::NotInElement("".to_string()));
// }
}
}

View File

@ -30,6 +30,19 @@ pub trait Parser<'s> {
type Output;
fn parse(input: &'s str) -> IResult<&str, Self::Output>;
fn parse_full(input: &'s str) -> crate::Result<Self::Output> {
match <Self as Parser>::parse(input) {
Ok((rest, output)) => {
if rest.is_empty() {
return Ok(output);
} else {
return Err(crate::error::Error::ExtraData(rest.to_string()));
}
}
Result::Err(e) => return Err(crate::error::Error::ParseError(e.to_string())),
}
}
}
/// [1] NSAttName ::= PrefixedAttName | DefaultAttName