Compare commits

..

2 Commits

Author SHA1 Message Date
cel 🌸 6d4832480b WIP: impl Stream for Reader 2024-11-10 22:28:55 +00:00
cel 🌸 140af50536 implement reader testing 2024-11-10 17:57:05 +00:00
7 changed files with 415 additions and 86 deletions

View File

@ -9,19 +9,20 @@ use crate::{
// when are namespaces names chosen then if they are automatically calculated // when are namespaces names chosen then if they are automatically calculated
// namespaces are held by readers and writers. // namespaces are held by readers and writers.
#[derive(PartialEq, Eq, Hash, Clone)] #[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub struct Namespace { pub struct Namespace {
pub prefix: Option<String>, pub prefix: Option<String>,
pub namespace: String, pub namespace: String,
} }
// names are qualified, they contain a reference to the namespace (held within the reader/writer) // names are qualified, they contain a reference to the namespace (held within the reader/writer)
#[derive(PartialEq, Eq, Hash, Clone)] #[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub struct Name { pub struct Name {
pub namespace: Namespace, pub namespace: Namespace,
pub name: String, pub name: String,
} }
#[derive(Debug)]
pub enum Content { pub enum Content {
Element(Element), Element(Element),
Text(String), Text(String),
@ -30,6 +31,7 @@ pub enum Content {
} }
// should this be a trait? // should this be a trait?
#[derive(Debug)]
pub struct Element { pub struct Element {
pub name: Name, pub name: Name,
// namespace: Name, // namespace: Name,

View File

@ -2,6 +2,7 @@ use std::{num::ParseIntError, str::Utf8Error};
use crate::element::{Name, Namespace}; use crate::element::{Name, Namespace};
#[derive(Debug)]
pub enum Error { pub enum Error {
ReadError(std::io::Error), ReadError(std::io::Error),
Utf8Error(Utf8Error), Utf8Error(Utf8Error),
@ -13,6 +14,7 @@ pub enum Error {
DuplicateAttribute(String), DuplicateAttribute(String),
UnqualifiedNamespace(String), UnqualifiedNamespace(String),
MismatchedEndTag(String, String), MismatchedEndTag(String, String),
NotInElement(String),
} }
impl From<std::io::Error> for Error { impl From<std::io::Error> for Error {

View File

@ -1,6 +1,6 @@
mod element; mod element;
mod error; mod error;
mod reader; pub mod reader;
mod writer; mod writer;
pub mod xml; pub mod xml;

View File

@ -1,9 +1,11 @@
use circular::Buffer; use circular::Buffer;
use futures::Stream; use futures::{FutureExt, Stream};
use nom::Err; use nom::Err;
use std::{ use std::{
collections::{BTreeMap, HashMap, HashSet}, collections::{BTreeMap, HashMap, HashSet},
future::Future,
path::Prefix, path::Prefix,
pin::{pin, Pin},
str::{self, FromStr}, str::{self, FromStr},
}; };
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt}; use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
@ -42,14 +44,77 @@ impl<R> Reader<R>
where where
R: AsyncRead + Unpin, R: AsyncRead + Unpin,
{ {
async fn read_buf(&mut self) -> Result<usize> { async fn read_buf<'s>(&mut self) -> Result<usize> {
Ok(self.inner.read_buf(&mut self.buffer).await?) Ok(self.inner.read_buf(&mut self.buffer).await?)
} }
async fn read_element<'s>(&'s mut self) -> Result<Element> { async fn read_prolog<'s>(&'s mut self) -> Result<()> {
self.read_buf().await?;
let mut input = str::from_utf8(self.buffer.data())?;
loop { loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Prolog::parse(input) {
Ok((rest, _prolog)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
self.buffer.consume(len);
return Ok(());
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
}
}
}
async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::STag::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
let element =
Reader::<R>::start_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
self.buffer.consume(len);
return Ok(element);
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
}
}
}
async fn read_end_tag<'s>(&'s mut self) -> Result<()> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::ETag::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
Reader::<R>::end_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
self.buffer.consume(len);
return Ok(());
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
}
}
}
async fn read_element<'s>(&'s mut self) -> Result<Element> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Element::parse(input) { match xml::Element::parse(input) {
Ok((rest, e)) => { Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len(); let len = self.buffer.available_data() - rest.as_bytes().len();
@ -58,10 +123,37 @@ where
return Ok(element); return Ok(element);
} }
std::result::Result::Err(e) => match e { std::result::Result::Err(e) => match e {
Err::Incomplete(_) => { Err::Incomplete(_) => {}
self.read_buf().await?; // TODO: better error
input = str::from_utf8(self.buffer.data())?; Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
}
}
}
async fn read_content<'s>(&'s mut self) -> Result<Content> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::ContentItem::parse(input) {
Ok((rest, c)) => {
match c {
xml::ContentItem::CharData(char_data) => todo!(),
xml::ContentItem::Element(element) => todo!(),
xml::ContentItem::Reference(reference) => todo!(),
xml::ContentItem::CDSect(cdsect) => todo!(),
xml::ContentItem::PI(pi) => todo!(),
xml::ContentItem::Comment(comment) => todo!(),
} }
let len = self.buffer.available_data() - rest.as_bytes().len();
let content = Reader::<R>::content_item_from_xml(&mut self.namespaces, e)?;
self.buffer.consume(len);
return Ok(element);
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
// TODO: better error // TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())), Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())), Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -72,6 +164,145 @@ where
} }
impl<R> Reader<R> { impl<R> Reader<R> {
fn content_item_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>,
item: xml::ContentItem,
) -> Result<Content> {
todo!()
}
fn start_tag_from_xml(
depth: &mut Vec<Name>,
namespaces: &mut Vec<HashSet<Namespace>>,
s_tag: xml::STag,
) -> Result<Element> {
let mut namespace_declarations = HashSet::new();
for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
Some((ns_name, value))
} else {
None
}
}) {
let prefix = match prefix {
xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
Some(prefixed_att_name.to_string())
}
xml::NSAttName::DefaultAttName => None,
};
let namespace = Namespace {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpace(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
let namespace_stack: Vec<&Namespace> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
.collect();
let mut attributes = HashMap::new();
for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::Attribute { name, value } = attribute {
Some((name, value))
} else {
None
}
}) {
let namespace;
let attribute_name;
match q_name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack.iter().rfind(|namespace| {
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
});
attribute_name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
attribute_name = unprefixed_name.to_string();
}
}
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
let name = Name {
namespace,
name: attribute_name,
};
let value = value.process()?;
if let Some(_value) = attributes.insert(name, value) {
return Err(Error::DuplicateAttribute(q_name.to_string()));
}
} else {
return Err(Error::UnqualifiedNamespace(q_name.to_string()));
}
}
let name;
let namespace;
match &s_tag.name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix.as_deref() == Some(**prefixed_name.prefix));
name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
name = unprefixed_name.to_string();
}
}
let namespace = (*namespace
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
.clone();
let name = Name { namespace, name };
depth.push(name.clone());
namespaces.push(namespace_declarations.clone());
return Ok(Element {
name,
namespace_decl: namespace_declarations,
attributes,
content: Vec::new(),
});
}
fn end_tag_from_xml(
depth: &mut Vec<Name>,
namespaces: &mut Vec<HashSet<Namespace>>,
e_tag: xml::ETag,
) -> Result<()> {
if let Some(s_tag_name) = depth.pop() {
if s_tag_name.namespace.prefix.as_deref() == e_tag.name.prefix()
&& s_tag_name.name == e_tag.name.local_part()
{
namespaces.pop();
return Ok(());
} else {
return Err(Error::MismatchedEndTag(
s_tag_name.name,
e_tag.name.to_string(),
));
}
} else {
return Err(Error::NotInElement(e_tag.name.to_string()));
}
}
fn element_from_xml( fn element_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>, namespaces: &mut Vec<HashSet<Namespace>>,
element: xml::Element, element: xml::Element,
@ -339,86 +570,146 @@ impl<R> Reader<R> {
} }
} }
text.map(|text| content.push(Content::Text(text))); text.map(|text| content.push(Content::Text(text)));
todo!() Ok(content)
} }
} }
// impl<R> Reader<R> impl<R: AsyncRead + Unpin> Stream for Reader<R> {
// where type Item = Result<Content>;
// R: AsyncBufReadExt + Unpin,
// {
// /// could resursively read and include namespace tree with values to be shadowed within new local context
// async fn read_recursive(&mut self, namespaces: BTreeMap<Option<String>, String>) -> Result<Element, Error> {
// let element;
// let len;
// loop {
// let buf = self.inner.fill_buf().await?;
// let input = str::from_utf8(buf)?;
// match crate::xml::element(input) {
// Ok((rest, e)) => {
// element = e;
// len = buf.len() - rest.len();
// break;
// }
// Err(e) => match e {
// Err::Incomplete(_) => (),
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
// },
// }
// }
// let final; fn poll_next(
// match element { self: std::pin::Pin<&mut Self>,
// crate::xml::Element::Empty(e) => { cx: &mut std::task::Context<'_>,
// let final = Element { ) -> std::task::Poll<Option<Self::Item>> {
let mut e = self;
let mut pinned = pin!(e.read_content());
pinned.as_mut().poll(cx).map(|result| Some(result))
}
}
// } #[cfg(test)]
// }, mod test {
// crate::xml::Element::NotEmpty(_, _, _) => todo!(), use futures::{sink::Buffer, StreamExt};
// } use tokio::io::AsyncRead;
// self.inner.consume(len); use super::Reader;
// todo!()
// }
// /// reads entire next prolog, element, or misc
// pub async fn read<E: From<Element>>(&mut self) -> Result<E, Error> {
// let element;
// let len;
// loop {
// let buf = self.inner.fill_buf().await?;
// let input = str::from_utf8(buf)?;
// match crate::xml::element(input) {
// Ok((rest, e)) => {
// element = e;
// len = buf.len() - rest.len();
// break;
// }
// Err(e) => match e {
// Err::Incomplete(_) => (),
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
// },
// }
// }
// self.inner.consume(len);
// // Ok(element) struct MockAsyncReader<'s>(&'s str);
// todo!()
// }
// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
// todo!()
// }
// pub async fn read_end(&self) -> Result<(), Error> {
// todo!()
// }
// }
// impl<R: AsyncBufRead> Stream for Reader<R> { impl<'s> MockAsyncReader<'s> {
// type Item = impl From<Element>; fn new(data: &'s str) -> Self {
Self(data)
}
}
// async fn poll_next( impl<'s> AsyncRead for MockAsyncReader<'s> {
// self: std::pin::Pin<&mut Self>, fn poll_read(
// cx: &mut std::task::Context<'_>, self: std::pin::Pin<&mut Self>,
// ) -> std::task::Poll<Option<Self::Item>> { _cx: &mut std::task::Context<'_>,
// todo!() buf: &mut tokio::io::ReadBuf<'_>,
// } ) -> std::task::Poll<std::io::Result<()>> {
// } buf.put_slice(self.0.as_bytes());
std::task::Poll::Ready(Ok(()))
}
}
const TEST_DOC: &'static str = "<xs:schema
xmlns:xs='http://www.w3.org/2001/XMLSchema'
targetNamespace='http://etherx.jabber.org/streams'
xmlns='http://etherx.jabber.org/streams'
elementFormDefault='unqualified'>
<xs:import namespace='jabber:client'/>
<xs:import namespace='jabber:server'/>
<xs:import namespace='urn:ietf:params:xml:ns:xmpp-sasl'/>
<xs:import namespace='urn:ietf:params:xml:ns:xmpp-streams'/>
<xs:import namespace='urn:ietf:params:xml:ns:xmpp-tls'/>
<xs:element name='stream'>
<xs:complexType>
<xs:sequence xmlns:client='jabber:client'
xmlns:server='jabber:server'>
<xs:element ref='features'
minOccurs='0'
maxOccurs='1'/>
<xs:any namespace='urn:ietf:params:xml:ns:xmpp-tls'
minOccurs='0'
maxOccurs='1'/>
<xs:any namespace='urn:ietf:params:xml:ns:xmpp-sasl'
minOccurs='0'
maxOccurs='1'/>
<xs:any namespace='##other'
minOccurs='0'
maxOccurs='unbounded'
processContents='lax'/>
<xs:choice minOccurs='0' maxOccurs='1'>
<xs:choice minOccurs='0' maxOccurs='unbounded'>
<xs:element ref='client:message'/>
<xs:element ref='client:presence'/>
<xs:element ref='client:iq'/>
</xs:choice>
<xs:choice minOccurs='0' maxOccurs='unbounded'>
<xs:element ref='server:message'/>
<xs:element ref='server:presence'/>
<xs:element ref='server:iq'/>
</xs:choice>
</xs:choice>
<xs:element ref='error' minOccurs='0' maxOccurs='1'/>
</xs:sequence>
<xs:attribute name='from' type='xs:string' use='optional'/>
<xs:attribute name='id' type='xs:string' use='optional'/>
<xs:attribute name='to' type='xs:string' use='optional'/>
<xs:attribute name='version' type='xs:decimal' use='optional'/>
<xs:attribute ref='xml:lang' use='optional'/>
<xs:anyAttribute namespace='##other' processContents='lax'/>
</xs:complexType>
</xs:element>
<xs:element name='features'>
<xs:complexType>
<xs:sequence>
<xs:any namespace='##other'
minOccurs='0'
maxOccurs='unbounded'
processContents='lax'/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name='error'>
<xs:complexType>
<xs:sequence xmlns:err='urn:ietf:params:xml:ns:xmpp-streams'>
<xs:group ref='err:streamErrorGroup'/>
<xs:element ref='err:text'
minOccurs='0'
maxOccurs='1'/>
<xs:any namespace='##other'
minOccurs='0'
maxOccurs='1'
processContents='lax'/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>asdf";
#[tokio::test]
async fn test_element_read() {
let mock = MockAsyncReader::new(TEST_DOC);
let mut reader = Reader::new(mock);
let element = reader.read_element().await.unwrap();
println!("{:#?}", element);
}
#[tokio::test]
async fn test_element_stream() {
let mock = MockAsyncReader::new(TEST_DOC);
let mut reader = Reader::new(mock);
let element = reader.read_start_tag().await.unwrap();
println!("{:#?}", element);
loop {
let element = reader.next().await.unwrap();
println!("{:#?}", element);
}
}
}

View File

@ -817,6 +817,7 @@ impl<'s> Composer<'s> for Content<'s> {
ContentItem::CDSect(cd_sect) => cd_sect.write(writer).await?, ContentItem::CDSect(cd_sect) => cd_sect.write(writer).await?,
ContentItem::PI(pi) => pi.write(writer).await?, ContentItem::PI(pi) => pi.write(writer).await?,
ContentItem::Comment(comment) => comment.write(writer).await?, ContentItem::Comment(comment) => comment.write(writer).await?,
_ => todo!("verify no split chardata"),
} }
if let Some(char_data) = char_data { if let Some(char_data) = char_data {
char_data.write(writer).await?; char_data.write(writer).await?;

View File

@ -47,6 +47,22 @@ pub enum QName<'s> {
UnprefixedName(UnprefixedName<'s>), UnprefixedName(UnprefixedName<'s>),
} }
impl<'s> QName<'s> {
pub fn prefix(&self) -> Option<&'s str> {
match self {
QName::PrefixedName(prefixed_name) => return Some(**prefixed_name.prefix),
QName::UnprefixedName(_) => return None,
}
}
pub fn local_part(&self) -> &str {
match self {
QName::PrefixedName(prefixed_name) => return **prefixed_name.local_part,
QName::UnprefixedName(unprefixed_name) => return ****unprefixed_name,
}
}
}
impl<'s> ToString for QName<'s> { impl<'s> ToString for QName<'s> {
fn to_string(&self) -> String { fn to_string(&self) -> String {
match self { match self {
@ -473,7 +489,7 @@ pub struct ETag<'s> {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum ContentItem<'s> { pub enum ContentItem<'s> {
// CharData(&'s str), CharData(CharData<'s>),
Element(Element<'s>), Element(Element<'s>),
Reference(Reference<'s>), Reference(Reference<'s>),
CDSect(CDSect<'s>), CDSect(CDSect<'s>),

View File

@ -733,6 +733,23 @@ impl<'s> Parser<'s, ETag<'s>> for ETag<'s> {
} }
} }
impl<'s> Parser<'s, ContentItem<'s>> for ContentItem<'s> {
fn parse(input: &'s str) -> IResult<&str, ContentItem<'s>> {
alt((
map(CharData::parse, |char_data| {
ContentItem::CharData(char_data)
}),
map(Element::parse, |element| ContentItem::Element(element)),
map(Reference::parse, |reference| {
ContentItem::Reference(reference)
}),
map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)),
map(PI::parse, |pi| ContentItem::PI(pi)),
map(Comment::parse, |comment| ContentItem::Comment(comment)),
))(input)
}
}
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* /// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
impl<'s> Parser<'s, Content<'s>> for Content<'s> { impl<'s> Parser<'s, Content<'s>> for Content<'s> {
fn parse(input: &'s str) -> IResult<&str, Content<'s>> { fn parse(input: &'s str) -> IResult<&str, Content<'s>> {