WIP: impl Stream for Reader

This commit is contained in:
cel 🌸 2024-11-10 22:28:55 +00:00
parent 140af50536
commit 6d4832480b
5 changed files with 305 additions and 94 deletions

View File

@ -14,6 +14,7 @@ pub enum Error {
DuplicateAttribute(String),
UnqualifiedNamespace(String),
MismatchedEndTag(String, String),
NotInElement(String),
}
impl From<std::io::Error> for Error {

View File

@ -1,9 +1,11 @@
use circular::Buffer;
use futures::Stream;
use futures::{FutureExt, Stream};
use nom::Err;
use std::{
collections::{BTreeMap, HashMap, HashSet},
future::Future,
path::Prefix,
pin::{pin, Pin},
str::{self, FromStr},
};
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
@ -42,14 +44,77 @@ impl<R> Reader<R>
where
R: AsyncRead + Unpin,
{
async fn read_buf(&mut self) -> Result<usize> {
async fn read_buf<'s>(&mut self) -> Result<usize> {
Ok(self.inner.read_buf(&mut self.buffer).await?)
}
async fn read_element<'s>(&'s mut self) -> Result<Element> {
self.read_buf().await?;
let mut input = str::from_utf8(self.buffer.data())?;
async fn read_prolog<'s>(&'s mut self) -> Result<()> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Prolog::parse(input) {
Ok((rest, _prolog)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
self.buffer.consume(len);
return Ok(());
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
}
}
}
async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::STag::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
let element =
Reader::<R>::start_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
self.buffer.consume(len);
return Ok(element);
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
}
}
}
async fn read_end_tag<'s>(&'s mut self) -> Result<()> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::ETag::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
Reader::<R>::end_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
self.buffer.consume(len);
return Ok(());
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
}
}
}
async fn read_element<'s>(&'s mut self) -> Result<Element> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Element::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
@ -58,10 +123,37 @@ where
return Ok(element);
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {
self.read_buf().await?;
input = str::from_utf8(self.buffer.data())?;
Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
}
}
}
async fn read_content<'s>(&'s mut self) -> Result<Content> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::ContentItem::parse(input) {
Ok((rest, c)) => {
match c {
xml::ContentItem::CharData(char_data) => todo!(),
xml::ContentItem::Element(element) => todo!(),
xml::ContentItem::Reference(reference) => todo!(),
xml::ContentItem::CDSect(cdsect) => todo!(),
xml::ContentItem::PI(pi) => todo!(),
xml::ContentItem::Comment(comment) => todo!(),
}
let len = self.buffer.available_data() - rest.as_bytes().len();
let content = Reader::<R>::content_item_from_xml(&mut self.namespaces, e)?;
self.buffer.consume(len);
return Ok(element);
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -72,6 +164,145 @@ where
}
impl<R> Reader<R> {
fn content_item_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>,
item: xml::ContentItem,
) -> Result<Content> {
todo!()
}
fn start_tag_from_xml(
depth: &mut Vec<Name>,
namespaces: &mut Vec<HashSet<Namespace>>,
s_tag: xml::STag,
) -> Result<Element> {
let mut namespace_declarations = HashSet::new();
for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
Some((ns_name, value))
} else {
None
}
}) {
let prefix = match prefix {
xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
Some(prefixed_att_name.to_string())
}
xml::NSAttName::DefaultAttName => None,
};
let namespace = Namespace {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpace(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
let namespace_stack: Vec<&Namespace> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
.collect();
let mut attributes = HashMap::new();
for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::Attribute { name, value } = attribute {
Some((name, value))
} else {
None
}
}) {
let namespace;
let attribute_name;
match q_name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack.iter().rfind(|namespace| {
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
});
attribute_name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
attribute_name = unprefixed_name.to_string();
}
}
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
let name = Name {
namespace,
name: attribute_name,
};
let value = value.process()?;
if let Some(_value) = attributes.insert(name, value) {
return Err(Error::DuplicateAttribute(q_name.to_string()));
}
} else {
return Err(Error::UnqualifiedNamespace(q_name.to_string()));
}
}
let name;
let namespace;
match &s_tag.name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix.as_deref() == Some(**prefixed_name.prefix));
name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
name = unprefixed_name.to_string();
}
}
let namespace = (*namespace
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
.clone();
let name = Name { namespace, name };
depth.push(name.clone());
namespaces.push(namespace_declarations.clone());
return Ok(Element {
name,
namespace_decl: namespace_declarations,
attributes,
content: Vec::new(),
});
}
fn end_tag_from_xml(
depth: &mut Vec<Name>,
namespaces: &mut Vec<HashSet<Namespace>>,
e_tag: xml::ETag,
) -> Result<()> {
if let Some(s_tag_name) = depth.pop() {
if s_tag_name.namespace.prefix.as_deref() == e_tag.name.prefix()
&& s_tag_name.name == e_tag.name.local_part()
{
namespaces.pop();
return Ok(());
} else {
return Err(Error::MismatchedEndTag(
s_tag_name.name,
e_tag.name.to_string(),
));
}
} else {
return Err(Error::NotInElement(e_tag.name.to_string()));
}
}
fn element_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>,
element: xml::Element,
@ -343,88 +574,22 @@ impl<R> Reader<R> {
}
}
// impl<R> Reader<R>
// where
// R: AsyncBufReadExt + Unpin,
// {
// /// could resursively read and include namespace tree with values to be shadowed within new local context
// async fn read_recursive(&mut self, namespaces: BTreeMap<Option<String>, String>) -> Result<Element, Error> {
// let element;
// let len;
// loop {
// let buf = self.inner.fill_buf().await?;
// let input = str::from_utf8(buf)?;
// match crate::xml::element(input) {
// Ok((rest, e)) => {
// element = e;
// len = buf.len() - rest.len();
// break;
// }
// Err(e) => match e {
// Err::Incomplete(_) => (),
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
// },
// }
// }
impl<R: AsyncRead + Unpin> Stream for Reader<R> {
type Item = Result<Content>;
// let final;
// match element {
// crate::xml::Element::Empty(e) => {
// let final = Element {
// }
// },
// crate::xml::Element::NotEmpty(_, _, _) => todo!(),
// }
// self.inner.consume(len);
// todo!()
// }
// /// reads entire next prolog, element, or misc
// pub async fn read<E: From<Element>>(&mut self) -> Result<E, Error> {
// let element;
// let len;
// loop {
// let buf = self.inner.fill_buf().await?;
// let input = str::from_utf8(buf)?;
// match crate::xml::element(input) {
// Ok((rest, e)) => {
// element = e;
// len = buf.len() - rest.len();
// break;
// }
// Err(e) => match e {
// Err::Incomplete(_) => (),
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
// },
// }
// }
// self.inner.consume(len);
// // Ok(element)
// todo!()
// }
// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
// todo!()
// }
// pub async fn read_end(&self) -> Result<(), Error> {
// todo!()
// }
// }
// impl<R: AsyncBufRead> Stream for Reader<R> {
// type Item = impl From<Element>;
// async fn poll_next(
// self: std::pin::Pin<&mut Self>,
// cx: &mut std::task::Context<'_>,
// ) -> std::task::Poll<Option<Self::Item>> {
// todo!()
// }
// }
fn poll_next(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Option<Self::Item>> {
let mut e = self;
let mut pinned = pin!(e.read_content());
pinned.as_mut().poll(cx).map(|result| Some(result))
}
}
#[cfg(test)]
mod test {
use futures::{sink::Buffer, StreamExt};
use tokio::io::AsyncRead;
use super::Reader;
@ -448,10 +613,7 @@ mod test {
}
}
#[tokio::test]
async fn test_element_read() {
let mock = MockAsyncReader::new(
"<xs:schema
const TEST_DOC: &'static str = "<xs:schema
xmlns:xs='http://www.w3.org/2001/XMLSchema'
targetNamespace='http://etherx.jabber.org/streams'
xmlns='http://etherx.jabber.org/streams'
@ -529,11 +691,25 @@ mod test {
</xs:complexType>
</xs:element>
</xs:schema>asdf
",
);
</xs:schema>asdf";
#[tokio::test]
async fn test_element_read() {
let mock = MockAsyncReader::new(TEST_DOC);
let mut reader = Reader::new(mock);
let element = reader.read_element().await.unwrap();
println!("{:#?}", element);
}
#[tokio::test]
async fn test_element_stream() {
let mock = MockAsyncReader::new(TEST_DOC);
let mut reader = Reader::new(mock);
let element = reader.read_start_tag().await.unwrap();
println!("{:#?}", element);
loop {
let element = reader.next().await.unwrap();
println!("{:#?}", element);
}
}
}

View File

@ -817,6 +817,7 @@ impl<'s> Composer<'s> for Content<'s> {
ContentItem::CDSect(cd_sect) => cd_sect.write(writer).await?,
ContentItem::PI(pi) => pi.write(writer).await?,
ContentItem::Comment(comment) => comment.write(writer).await?,
_ => todo!("verify no split chardata"),
}
if let Some(char_data) = char_data {
char_data.write(writer).await?;

View File

@ -47,6 +47,22 @@ pub enum QName<'s> {
UnprefixedName(UnprefixedName<'s>),
}
impl<'s> QName<'s> {
pub fn prefix(&self) -> Option<&'s str> {
match self {
QName::PrefixedName(prefixed_name) => return Some(**prefixed_name.prefix),
QName::UnprefixedName(_) => return None,
}
}
pub fn local_part(&self) -> &str {
match self {
QName::PrefixedName(prefixed_name) => return **prefixed_name.local_part,
QName::UnprefixedName(unprefixed_name) => return ****unprefixed_name,
}
}
}
impl<'s> ToString for QName<'s> {
fn to_string(&self) -> String {
match self {
@ -473,7 +489,7 @@ pub struct ETag<'s> {
#[derive(Debug, Clone)]
pub enum ContentItem<'s> {
// CharData(&'s str),
CharData(CharData<'s>),
Element(Element<'s>),
Reference(Reference<'s>),
CDSect(CDSect<'s>),

View File

@ -733,6 +733,23 @@ impl<'s> Parser<'s, ETag<'s>> for ETag<'s> {
}
}
impl<'s> Parser<'s, ContentItem<'s>> for ContentItem<'s> {
fn parse(input: &'s str) -> IResult<&str, ContentItem<'s>> {
alt((
map(CharData::parse, |char_data| {
ContentItem::CharData(char_data)
}),
map(Element::parse, |element| ContentItem::Element(element)),
map(Reference::parse, |reference| {
ContentItem::Reference(reference)
}),
map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)),
map(PI::parse, |pi| ContentItem::PI(pi)),
map(Comment::parse, |comment| ContentItem::Comment(comment)),
))(input)
}
}
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
impl<'s> Parser<'s, Content<'s>> for Content<'s> {
fn parse(input: &'s str) -> IResult<&str, Content<'s>> {