impl Stream for Reader

This commit is contained in:
cel 🌸 2024-11-11 12:15:13 +00:00
parent 6d4832480b
commit 32545d8a6a
4 changed files with 141 additions and 38 deletions

View File

@ -26,7 +26,7 @@ pub struct Name {
pub enum Content { pub enum Content {
Element(Element), Element(Element),
Text(String), Text(String),
PI(String), PI,
Comment(String), Comment(String),
} }

View File

@ -133,44 +133,135 @@ where
} }
async fn read_content<'s>(&'s mut self) -> Result<Content> { async fn read_content<'s>(&'s mut self) -> Result<Content> {
let mut last_char = false;
let mut text = String::new();
loop { loop {
self.read_buf().await?; self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?; let input = str::from_utf8(self.buffer.data())?;
if last_char == false {
match xml::ContentItem::parse(input) { match xml::CharData::parse(input) {
Ok((rest, c)) => { Ok((rest, char_data)) => {
match c { let len = self.buffer.available_data() - rest.as_bytes().len();
xml::ContentItem::CharData(char_data) => todo!(), text.push_str(*char_data);
xml::ContentItem::Element(element) => todo!(), self.buffer.consume(len);
xml::ContentItem::Reference(reference) => todo!(), last_char = true;
xml::ContentItem::CDSect(cdsect) => todo!(),
xml::ContentItem::PI(pi) => todo!(),
xml::ContentItem::Comment(comment) => todo!(),
} }
let len = self.buffer.available_data() - rest.as_bytes().len(); std::result::Result::Err(e) => match e {
let content = Reader::<R>::content_item_from_xml(&mut self.namespaces, e)?; Err::Incomplete(_needed) => continue,
self.buffer.consume(len); _ => match xml::ContentItem::parse(input) {
return Ok(element); Ok((rest, content_item)) => match content_item {
xml::ContentItem::Element(element) => {
if !text.is_empty() {
return Ok(Content::Text(text));
} else {
let len =
self.buffer.available_data() - rest.as_bytes().len();
let element =
Self::element_from_xml(&mut self.namespaces, element)?;
self.buffer.consume(len);
return Ok(Content::Element(element));
}
}
xml::ContentItem::Reference(reference) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
text.push(reference.process()?);
self.buffer.consume(len);
continue;
}
xml::ContentItem::CDSect(cd_sect) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
text.push_str(**cd_sect);
self.buffer.consume(len);
continue;
}
xml::ContentItem::PI(_pi) => {
if !text.is_empty() {
return Ok(Content::Text(text));
} else {
let len =
self.buffer.available_data() - rest.as_bytes().len();
self.buffer.consume(len);
return Ok(Content::PI);
}
}
xml::ContentItem::Comment(comment) => {
if !text.is_empty() {
return Ok(Content::Text(text));
} else {
let len =
self.buffer.available_data() - rest.as_bytes().len();
let comment = comment.to_string();
self.buffer.consume(len);
return Ok(Content::Comment(comment));
}
}
},
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => continue,
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
},
},
}
} else {
match xml::ContentItem::parse(input) {
Ok((rest, content_item)) => match content_item {
xml::ContentItem::Element(element) => {
if !text.is_empty() {
return Ok(Content::Text(text));
} else {
let len = self.buffer.available_data() - rest.as_bytes().len();
let element =
Self::element_from_xml(&mut self.namespaces, element)?;
self.buffer.consume(len);
return Ok(Content::Element(element));
}
}
xml::ContentItem::Reference(reference) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
text.push(reference.process()?);
self.buffer.consume(len);
last_char = false;
continue;
}
xml::ContentItem::CDSect(cd_sect) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
text.push_str(**cd_sect);
self.buffer.consume(len);
last_char = false;
continue;
}
xml::ContentItem::PI(_pi) => {
if !text.is_empty() {
return Ok(Content::Text(text));
} else {
let len = self.buffer.available_data() - rest.as_bytes().len();
self.buffer.consume(len);
return Ok(Content::PI);
}
}
xml::ContentItem::Comment(comment) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
let comment = comment.to_string();
self.buffer.consume(len);
return Ok(Content::Comment(comment));
}
},
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => continue,
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
} }
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
} }
} }
} }
} }
impl<R> Reader<R> { impl<R> Reader<R> {
fn content_item_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>,
item: xml::ContentItem,
) -> Result<Content> {
todo!()
}
fn start_tag_from_xml( fn start_tag_from_xml(
depth: &mut Vec<Name>, depth: &mut Vec<Name>,
namespaces: &mut Vec<HashSet<Namespace>>, namespaces: &mut Vec<HashSet<Namespace>>,
@ -594,11 +685,14 @@ mod test {
use super::Reader; use super::Reader;
struct MockAsyncReader<'s>(&'s str); struct MockAsyncReader<'s> {
put: bool,
data: &'s str,
}
impl<'s> MockAsyncReader<'s> { impl<'s> MockAsyncReader<'s> {
fn new(data: &'s str) -> Self { fn new(data: &'s str) -> Self {
Self(data) Self { put: false, data }
} }
} }
@ -608,7 +702,10 @@ mod test {
_cx: &mut std::task::Context<'_>, _cx: &mut std::task::Context<'_>,
buf: &mut tokio::io::ReadBuf<'_>, buf: &mut tokio::io::ReadBuf<'_>,
) -> std::task::Poll<std::io::Result<()>> { ) -> std::task::Poll<std::io::Result<()>> {
buf.put_slice(self.0.as_bytes()); if !self.put {
buf.put_slice(self.data.as_bytes());
self.get_mut().put = true
};
std::task::Poll::Ready(Ok(())) std::task::Poll::Ready(Ok(()))
} }
} }
@ -706,10 +803,19 @@ mod test {
let mock = MockAsyncReader::new(TEST_DOC); let mock = MockAsyncReader::new(TEST_DOC);
let mut reader = Reader::new(mock); let mut reader = Reader::new(mock);
let element = reader.read_start_tag().await.unwrap(); let element = reader.read_start_tag().await.unwrap();
println!("{:#?}", element); println!("start element: {:#?}", element);
let mut content_count = 0;
loop { loop {
let element = reader.next().await.unwrap(); if let Some(content) = reader.next().await {
println!("{:#?}", element); match content {
Ok(content) => {
content_count += 1;
println!("content {}: {:#?}", content_count, content)
}
Err(_) => break,
}
}
} }
reader.read_end_tag().await.unwrap()
} }
} }

View File

@ -489,7 +489,7 @@ pub struct ETag<'s> {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum ContentItem<'s> { pub enum ContentItem<'s> {
CharData(CharData<'s>), // CharData(CharData<'s>),
Element(Element<'s>), Element(Element<'s>),
Reference(Reference<'s>), Reference(Reference<'s>),
CDSect(CDSect<'s>), CDSect(CDSect<'s>),

View File

@ -736,9 +736,6 @@ impl<'s> Parser<'s, ETag<'s>> for ETag<'s> {
impl<'s> Parser<'s, ContentItem<'s>> for ContentItem<'s> { impl<'s> Parser<'s, ContentItem<'s>> for ContentItem<'s> {
fn parse(input: &'s str) -> IResult<&str, ContentItem<'s>> { fn parse(input: &'s str) -> IResult<&str, ContentItem<'s>> {
alt(( alt((
map(CharData::parse, |char_data| {
ContentItem::CharData(char_data)
}),
map(Element::parse, |element| ContentItem::Element(element)), map(Element::parse, |element| ContentItem::Element(element)),
map(Reference::parse, |reference| { map(Reference::parse, |reference| {
ContentItem::Reference(reference) ContentItem::Reference(reference)