diff --git a/src/element.rs b/src/element.rs index e93a0f3..9bdde35 100644 --- a/src/element.rs +++ b/src/element.rs @@ -26,7 +26,7 @@ pub struct Name { pub enum Content { Element(Element), Text(String), - PI(String), + PI, Comment(String), } diff --git a/src/reader.rs b/src/reader.rs index dc16d31..64a0ed8 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -133,44 +133,135 @@ where } async fn read_content<'s>(&'s mut self) -> Result { + let mut last_char = false; + let mut text = String::new(); loop { self.read_buf().await?; let input = str::from_utf8(self.buffer.data())?; - - match xml::ContentItem::parse(input) { - Ok((rest, c)) => { - match c { - xml::ContentItem::CharData(char_data) => todo!(), - xml::ContentItem::Element(element) => todo!(), - xml::ContentItem::Reference(reference) => todo!(), - xml::ContentItem::CDSect(cdsect) => todo!(), - xml::ContentItem::PI(pi) => todo!(), - xml::ContentItem::Comment(comment) => todo!(), + if last_char == false { + match xml::CharData::parse(input) { + Ok((rest, char_data)) => { + let len = self.buffer.available_data() - rest.as_bytes().len(); + text.push_str(*char_data); + self.buffer.consume(len); + last_char = true; } - let len = self.buffer.available_data() - rest.as_bytes().len(); - let content = Reader::::content_item_from_xml(&mut self.namespaces, e)?; - self.buffer.consume(len); - return Ok(element); + std::result::Result::Err(e) => match e { + Err::Incomplete(_needed) => continue, + _ => match xml::ContentItem::parse(input) { + Ok((rest, content_item)) => match content_item { + xml::ContentItem::Element(element) => { + if !text.is_empty() { + return Ok(Content::Text(text)); + } else { + let len = + self.buffer.available_data() - rest.as_bytes().len(); + let element = + Self::element_from_xml(&mut self.namespaces, element)?; + self.buffer.consume(len); + return Ok(Content::Element(element)); + } + } + xml::ContentItem::Reference(reference) => { + let len = self.buffer.available_data() - rest.as_bytes().len(); + text.push(reference.process()?); + self.buffer.consume(len); + continue; + } + xml::ContentItem::CDSect(cd_sect) => { + let len = self.buffer.available_data() - rest.as_bytes().len(); + text.push_str(**cd_sect); + self.buffer.consume(len); + continue; + } + xml::ContentItem::PI(_pi) => { + if !text.is_empty() { + return Ok(Content::Text(text)); + } else { + let len = + self.buffer.available_data() - rest.as_bytes().len(); + self.buffer.consume(len); + return Ok(Content::PI); + } + } + xml::ContentItem::Comment(comment) => { + if !text.is_empty() { + return Ok(Content::Text(text)); + } else { + let len = + self.buffer.available_data() - rest.as_bytes().len(); + let comment = comment.to_string(); + self.buffer.consume(len); + return Ok(Content::Comment(comment)); + } + } + }, + std::result::Result::Err(e) => match e { + Err::Incomplete(_) => continue, + // TODO: better error + Err::Error(e) => return Err(Error::ParseError(e.to_string())), + Err::Failure(e) => return Err(Error::ParseError(e.to_string())), + }, + }, + }, + } + } else { + match xml::ContentItem::parse(input) { + Ok((rest, content_item)) => match content_item { + xml::ContentItem::Element(element) => { + if !text.is_empty() { + return Ok(Content::Text(text)); + } else { + let len = self.buffer.available_data() - rest.as_bytes().len(); + let element = + Self::element_from_xml(&mut self.namespaces, element)?; + self.buffer.consume(len); + return Ok(Content::Element(element)); + } + } + xml::ContentItem::Reference(reference) => { + let len = self.buffer.available_data() - rest.as_bytes().len(); + text.push(reference.process()?); + self.buffer.consume(len); + last_char = false; + continue; + } + xml::ContentItem::CDSect(cd_sect) => { + let len = self.buffer.available_data() - rest.as_bytes().len(); + text.push_str(**cd_sect); + self.buffer.consume(len); + last_char = false; + continue; + } + xml::ContentItem::PI(_pi) => { + if !text.is_empty() { + return Ok(Content::Text(text)); + } else { + let len = self.buffer.available_data() - rest.as_bytes().len(); + self.buffer.consume(len); + return Ok(Content::PI); + } + } + xml::ContentItem::Comment(comment) => { + let len = self.buffer.available_data() - rest.as_bytes().len(); + let comment = comment.to_string(); + self.buffer.consume(len); + return Ok(Content::Comment(comment)); + } + }, + std::result::Result::Err(e) => match e { + Err::Incomplete(_) => continue, + // TODO: better error + Err::Error(e) => return Err(Error::ParseError(e.to_string())), + Err::Failure(e) => return Err(Error::ParseError(e.to_string())), + }, } - std::result::Result::Err(e) => match e { - Err::Incomplete(_) => {} - // TODO: better error - Err::Error(e) => return Err(Error::ParseError(e.to_string())), - Err::Failure(e) => return Err(Error::ParseError(e.to_string())), - }, } } } } impl Reader { - fn content_item_from_xml( - namespaces: &mut Vec>, - item: xml::ContentItem, - ) -> Result { - todo!() - } - fn start_tag_from_xml( depth: &mut Vec, namespaces: &mut Vec>, @@ -594,11 +685,14 @@ mod test { use super::Reader; - struct MockAsyncReader<'s>(&'s str); + struct MockAsyncReader<'s> { + put: bool, + data: &'s str, + } impl<'s> MockAsyncReader<'s> { fn new(data: &'s str) -> Self { - Self(data) + Self { put: false, data } } } @@ -608,7 +702,10 @@ mod test { _cx: &mut std::task::Context<'_>, buf: &mut tokio::io::ReadBuf<'_>, ) -> std::task::Poll> { - buf.put_slice(self.0.as_bytes()); + if !self.put { + buf.put_slice(self.data.as_bytes()); + self.get_mut().put = true + }; std::task::Poll::Ready(Ok(())) } } @@ -706,10 +803,19 @@ mod test { let mock = MockAsyncReader::new(TEST_DOC); let mut reader = Reader::new(mock); let element = reader.read_start_tag().await.unwrap(); - println!("{:#?}", element); + println!("start element: {:#?}", element); + let mut content_count = 0; loop { - let element = reader.next().await.unwrap(); - println!("{:#?}", element); + if let Some(content) = reader.next().await { + match content { + Ok(content) => { + content_count += 1; + println!("content {}: {:#?}", content_count, content) + } + Err(_) => break, + } + } } + reader.read_end_tag().await.unwrap() } } diff --git a/src/xml/mod.rs b/src/xml/mod.rs index 9424d0b..0e84155 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -489,7 +489,7 @@ pub struct ETag<'s> { #[derive(Debug, Clone)] pub enum ContentItem<'s> { - CharData(CharData<'s>), + // CharData(CharData<'s>), Element(Element<'s>), Reference(Reference<'s>), CDSect(CDSect<'s>), diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs index 3cbefd3..293e552 100644 --- a/src/xml/parsers.rs +++ b/src/xml/parsers.rs @@ -736,9 +736,6 @@ impl<'s> Parser<'s, ETag<'s>> for ETag<'s> { impl<'s> Parser<'s, ContentItem<'s>> for ContentItem<'s> { fn parse(input: &'s str) -> IResult<&str, ContentItem<'s>> { alt(( - map(CharData::parse, |char_data| { - ContentItem::CharData(char_data) - }), map(Element::parse, |element| ContentItem::Element(element)), map(Reference::parse, |reference| { ContentItem::Reference(reference)