WIP: impl Stream for Reader
This commit is contained in:
parent
140af50536
commit
6d4832480b
|
@ -14,6 +14,7 @@ pub enum Error {
|
|||
DuplicateAttribute(String),
|
||||
UnqualifiedNamespace(String),
|
||||
MismatchedEndTag(String, String),
|
||||
NotInElement(String),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
|
|
362
src/reader.rs
362
src/reader.rs
|
@ -1,9 +1,11 @@
|
|||
use circular::Buffer;
|
||||
use futures::Stream;
|
||||
use futures::{FutureExt, Stream};
|
||||
use nom::Err;
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
future::Future,
|
||||
path::Prefix,
|
||||
pin::{pin, Pin},
|
||||
str::{self, FromStr},
|
||||
};
|
||||
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
|
||||
|
@ -42,14 +44,77 @@ impl<R> Reader<R>
|
|||
where
|
||||
R: AsyncRead + Unpin,
|
||||
{
|
||||
async fn read_buf(&mut self) -> Result<usize> {
|
||||
async fn read_buf<'s>(&mut self) -> Result<usize> {
|
||||
Ok(self.inner.read_buf(&mut self.buffer).await?)
|
||||
}
|
||||
|
||||
async fn read_element<'s>(&'s mut self) -> Result<Element> {
|
||||
self.read_buf().await?;
|
||||
let mut input = str::from_utf8(self.buffer.data())?;
|
||||
async fn read_prolog<'s>(&'s mut self) -> Result<()> {
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
match xml::Prolog::parse(input) {
|
||||
Ok((rest, _prolog)) => {
|
||||
let len = self.buffer.available_data() - rest.as_bytes().len();
|
||||
self.buffer.consume(len);
|
||||
return Ok(());
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => {}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
match xml::STag::parse(input) {
|
||||
Ok((rest, e)) => {
|
||||
let len = self.buffer.available_data() - rest.as_bytes().len();
|
||||
let element =
|
||||
Reader::<R>::start_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
|
||||
self.buffer.consume(len);
|
||||
return Ok(element);
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => {}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_end_tag<'s>(&'s mut self) -> Result<()> {
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
match xml::ETag::parse(input) {
|
||||
Ok((rest, e)) => {
|
||||
let len = self.buffer.available_data() - rest.as_bytes().len();
|
||||
Reader::<R>::end_tag_from_xml(&mut self.depth, &mut self.namespaces, e)?;
|
||||
self.buffer.consume(len);
|
||||
return Ok(());
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => {}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_element<'s>(&'s mut self) -> Result<Element> {
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
match xml::Element::parse(input) {
|
||||
Ok((rest, e)) => {
|
||||
let len = self.buffer.available_data() - rest.as_bytes().len();
|
||||
|
@ -58,10 +123,37 @@ where
|
|||
return Ok(element);
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => {
|
||||
self.read_buf().await?;
|
||||
input = str::from_utf8(self.buffer.data())?;
|
||||
Err::Incomplete(_) => {}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_content<'s>(&'s mut self) -> Result<Content> {
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
|
||||
match xml::ContentItem::parse(input) {
|
||||
Ok((rest, c)) => {
|
||||
match c {
|
||||
xml::ContentItem::CharData(char_data) => todo!(),
|
||||
xml::ContentItem::Element(element) => todo!(),
|
||||
xml::ContentItem::Reference(reference) => todo!(),
|
||||
xml::ContentItem::CDSect(cdsect) => todo!(),
|
||||
xml::ContentItem::PI(pi) => todo!(),
|
||||
xml::ContentItem::Comment(comment) => todo!(),
|
||||
}
|
||||
let len = self.buffer.available_data() - rest.as_bytes().len();
|
||||
let content = Reader::<R>::content_item_from_xml(&mut self.namespaces, e)?;
|
||||
self.buffer.consume(len);
|
||||
return Ok(element);
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => {}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
|
@ -72,6 +164,145 @@ where
|
|||
}
|
||||
|
||||
impl<R> Reader<R> {
|
||||
fn content_item_from_xml(
|
||||
namespaces: &mut Vec<HashSet<Namespace>>,
|
||||
item: xml::ContentItem,
|
||||
) -> Result<Content> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn start_tag_from_xml(
|
||||
depth: &mut Vec<Name>,
|
||||
namespaces: &mut Vec<HashSet<Namespace>>,
|
||||
s_tag: xml::STag,
|
||||
) -> Result<Element> {
|
||||
let mut namespace_declarations = HashSet::new();
|
||||
for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
|
||||
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
|
||||
Some((ns_name, value))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}) {
|
||||
let prefix = match prefix {
|
||||
xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
|
||||
Some(prefixed_att_name.to_string())
|
||||
}
|
||||
xml::NSAttName::DefaultAttName => None,
|
||||
};
|
||||
let namespace = Namespace {
|
||||
prefix,
|
||||
namespace: namespace.process()?,
|
||||
};
|
||||
if !namespace_declarations.insert(namespace.clone()) {
|
||||
return Err(Error::DuplicateNameSpace(namespace));
|
||||
}
|
||||
}
|
||||
|
||||
// all namespaces available to the element (from both parent elements and element itself)
|
||||
let namespace_stack: Vec<&Namespace> = namespaces
|
||||
.iter()
|
||||
.flatten()
|
||||
.chain(namespace_declarations.iter())
|
||||
.collect();
|
||||
|
||||
let mut attributes = HashMap::new();
|
||||
|
||||
for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| {
|
||||
if let xml::Attribute::Attribute { name, value } = attribute {
|
||||
Some((name, value))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}) {
|
||||
let namespace;
|
||||
let attribute_name;
|
||||
match q_name {
|
||||
xml::QName::PrefixedName(prefixed_name) => {
|
||||
namespace = namespace_stack.iter().rfind(|namespace| {
|
||||
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
|
||||
});
|
||||
attribute_name = prefixed_name.local_part.to_string();
|
||||
}
|
||||
xml::QName::UnprefixedName(unprefixed_name) => {
|
||||
namespace = namespace_stack
|
||||
.iter()
|
||||
.rfind(|namespace| namespace.prefix == None);
|
||||
attribute_name = unprefixed_name.to_string();
|
||||
}
|
||||
}
|
||||
if let Some(namespace) = namespace {
|
||||
let namespace = (*namespace).clone();
|
||||
let name = Name {
|
||||
namespace,
|
||||
name: attribute_name,
|
||||
};
|
||||
let value = value.process()?;
|
||||
if let Some(_value) = attributes.insert(name, value) {
|
||||
return Err(Error::DuplicateAttribute(q_name.to_string()));
|
||||
}
|
||||
} else {
|
||||
return Err(Error::UnqualifiedNamespace(q_name.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
let name;
|
||||
let namespace;
|
||||
match &s_tag.name {
|
||||
xml::QName::PrefixedName(prefixed_name) => {
|
||||
namespace = namespace_stack
|
||||
.iter()
|
||||
.rfind(|namespace| namespace.prefix.as_deref() == Some(**prefixed_name.prefix));
|
||||
name = prefixed_name.local_part.to_string();
|
||||
}
|
||||
xml::QName::UnprefixedName(unprefixed_name) => {
|
||||
namespace = namespace_stack
|
||||
.iter()
|
||||
.rfind(|namespace| namespace.prefix == None);
|
||||
name = unprefixed_name.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
let namespace = (*namespace
|
||||
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
|
||||
.clone();
|
||||
|
||||
let name = Name { namespace, name };
|
||||
|
||||
depth.push(name.clone());
|
||||
|
||||
namespaces.push(namespace_declarations.clone());
|
||||
|
||||
return Ok(Element {
|
||||
name,
|
||||
namespace_decl: namespace_declarations,
|
||||
attributes,
|
||||
content: Vec::new(),
|
||||
});
|
||||
}
|
||||
|
||||
fn end_tag_from_xml(
|
||||
depth: &mut Vec<Name>,
|
||||
namespaces: &mut Vec<HashSet<Namespace>>,
|
||||
e_tag: xml::ETag,
|
||||
) -> Result<()> {
|
||||
if let Some(s_tag_name) = depth.pop() {
|
||||
if s_tag_name.namespace.prefix.as_deref() == e_tag.name.prefix()
|
||||
&& s_tag_name.name == e_tag.name.local_part()
|
||||
{
|
||||
namespaces.pop();
|
||||
return Ok(());
|
||||
} else {
|
||||
return Err(Error::MismatchedEndTag(
|
||||
s_tag_name.name,
|
||||
e_tag.name.to_string(),
|
||||
));
|
||||
}
|
||||
} else {
|
||||
return Err(Error::NotInElement(e_tag.name.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
fn element_from_xml(
|
||||
namespaces: &mut Vec<HashSet<Namespace>>,
|
||||
element: xml::Element,
|
||||
|
@ -343,88 +574,22 @@ impl<R> Reader<R> {
|
|||
}
|
||||
}
|
||||
|
||||
// impl<R> Reader<R>
|
||||
// where
|
||||
// R: AsyncBufReadExt + Unpin,
|
||||
// {
|
||||
// /// could resursively read and include namespace tree with values to be shadowed within new local context
|
||||
// async fn read_recursive(&mut self, namespaces: BTreeMap<Option<String>, String>) -> Result<Element, Error> {
|
||||
// let element;
|
||||
// let len;
|
||||
// loop {
|
||||
// let buf = self.inner.fill_buf().await?;
|
||||
// let input = str::from_utf8(buf)?;
|
||||
// match crate::xml::element(input) {
|
||||
// Ok((rest, e)) => {
|
||||
// element = e;
|
||||
// len = buf.len() - rest.len();
|
||||
// break;
|
||||
// }
|
||||
// Err(e) => match e {
|
||||
// Err::Incomplete(_) => (),
|
||||
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
|
||||
// },
|
||||
// }
|
||||
// }
|
||||
impl<R: AsyncRead + Unpin> Stream for Reader<R> {
|
||||
type Item = Result<Content>;
|
||||
|
||||
// let final;
|
||||
// match element {
|
||||
// crate::xml::Element::Empty(e) => {
|
||||
// let final = Element {
|
||||
|
||||
// }
|
||||
// },
|
||||
// crate::xml::Element::NotEmpty(_, _, _) => todo!(),
|
||||
// }
|
||||
|
||||
// self.inner.consume(len);
|
||||
// todo!()
|
||||
// }
|
||||
// /// reads entire next prolog, element, or misc
|
||||
// pub async fn read<E: From<Element>>(&mut self) -> Result<E, Error> {
|
||||
// let element;
|
||||
// let len;
|
||||
// loop {
|
||||
// let buf = self.inner.fill_buf().await?;
|
||||
// let input = str::from_utf8(buf)?;
|
||||
// match crate::xml::element(input) {
|
||||
// Ok((rest, e)) => {
|
||||
// element = e;
|
||||
// len = buf.len() - rest.len();
|
||||
// break;
|
||||
// }
|
||||
// Err(e) => match e {
|
||||
// Err::Incomplete(_) => (),
|
||||
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
|
||||
// },
|
||||
// }
|
||||
// }
|
||||
// self.inner.consume(len);
|
||||
|
||||
// // Ok(element)
|
||||
// todo!()
|
||||
// }
|
||||
// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
|
||||
// todo!()
|
||||
// }
|
||||
// pub async fn read_end(&self) -> Result<(), Error> {
|
||||
// todo!()
|
||||
// }
|
||||
// }
|
||||
|
||||
// impl<R: AsyncBufRead> Stream for Reader<R> {
|
||||
// type Item = impl From<Element>;
|
||||
|
||||
// async fn poll_next(
|
||||
// self: std::pin::Pin<&mut Self>,
|
||||
// cx: &mut std::task::Context<'_>,
|
||||
// ) -> std::task::Poll<Option<Self::Item>> {
|
||||
// todo!()
|
||||
// }
|
||||
// }
|
||||
fn poll_next(
|
||||
self: std::pin::Pin<&mut Self>,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
) -> std::task::Poll<Option<Self::Item>> {
|
||||
let mut e = self;
|
||||
let mut pinned = pin!(e.read_content());
|
||||
pinned.as_mut().poll(cx).map(|result| Some(result))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use futures::{sink::Buffer, StreamExt};
|
||||
use tokio::io::AsyncRead;
|
||||
|
||||
use super::Reader;
|
||||
|
@ -448,10 +613,7 @@ mod test {
|
|||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_element_read() {
|
||||
let mock = MockAsyncReader::new(
|
||||
"<xs:schema
|
||||
const TEST_DOC: &'static str = "<xs:schema
|
||||
xmlns:xs='http://www.w3.org/2001/XMLSchema'
|
||||
targetNamespace='http://etherx.jabber.org/streams'
|
||||
xmlns='http://etherx.jabber.org/streams'
|
||||
|
@ -529,11 +691,25 @@ mod test {
|
|||
</xs:complexType>
|
||||
</xs:element>
|
||||
|
||||
</xs:schema>asdf
|
||||
",
|
||||
);
|
||||
</xs:schema>asdf";
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_element_read() {
|
||||
let mock = MockAsyncReader::new(TEST_DOC);
|
||||
let mut reader = Reader::new(mock);
|
||||
let element = reader.read_element().await.unwrap();
|
||||
println!("{:#?}", element);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_element_stream() {
|
||||
let mock = MockAsyncReader::new(TEST_DOC);
|
||||
let mut reader = Reader::new(mock);
|
||||
let element = reader.read_start_tag().await.unwrap();
|
||||
println!("{:#?}", element);
|
||||
loop {
|
||||
let element = reader.next().await.unwrap();
|
||||
println!("{:#?}", element);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -817,6 +817,7 @@ impl<'s> Composer<'s> for Content<'s> {
|
|||
ContentItem::CDSect(cd_sect) => cd_sect.write(writer).await?,
|
||||
ContentItem::PI(pi) => pi.write(writer).await?,
|
||||
ContentItem::Comment(comment) => comment.write(writer).await?,
|
||||
_ => todo!("verify no split chardata"),
|
||||
}
|
||||
if let Some(char_data) = char_data {
|
||||
char_data.write(writer).await?;
|
||||
|
|
|
@ -47,6 +47,22 @@ pub enum QName<'s> {
|
|||
UnprefixedName(UnprefixedName<'s>),
|
||||
}
|
||||
|
||||
impl<'s> QName<'s> {
|
||||
pub fn prefix(&self) -> Option<&'s str> {
|
||||
match self {
|
||||
QName::PrefixedName(prefixed_name) => return Some(**prefixed_name.prefix),
|
||||
QName::UnprefixedName(_) => return None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn local_part(&self) -> &str {
|
||||
match self {
|
||||
QName::PrefixedName(prefixed_name) => return **prefixed_name.local_part,
|
||||
QName::UnprefixedName(unprefixed_name) => return ****unprefixed_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> ToString for QName<'s> {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
|
@ -473,7 +489,7 @@ pub struct ETag<'s> {
|
|||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ContentItem<'s> {
|
||||
// CharData(&'s str),
|
||||
CharData(CharData<'s>),
|
||||
Element(Element<'s>),
|
||||
Reference(Reference<'s>),
|
||||
CDSect(CDSect<'s>),
|
||||
|
|
|
@ -733,6 +733,23 @@ impl<'s> Parser<'s, ETag<'s>> for ETag<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'s> Parser<'s, ContentItem<'s>> for ContentItem<'s> {
|
||||
fn parse(input: &'s str) -> IResult<&str, ContentItem<'s>> {
|
||||
alt((
|
||||
map(CharData::parse, |char_data| {
|
||||
ContentItem::CharData(char_data)
|
||||
}),
|
||||
map(Element::parse, |element| ContentItem::Element(element)),
|
||||
map(Reference::parse, |reference| {
|
||||
ContentItem::Reference(reference)
|
||||
}),
|
||||
map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)),
|
||||
map(PI::parse, |pi| ContentItem::PI(pi)),
|
||||
map(Comment::parse, |comment| ContentItem::Comment(comment)),
|
||||
))(input)
|
||||
}
|
||||
}
|
||||
|
||||
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
|
||||
impl<'s> Parser<'s, Content<'s>> for Content<'s> {
|
||||
fn parse(input: &'s str) -> IResult<&str, Content<'s>> {
|
||||
|
|
Loading…
Reference in New Issue