From bec1a204390f4f4ea60e419331a5903e5f88169e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= Date: Fri, 14 Jun 2024 13:11:32 +0100 Subject: [PATCH] actually get it to build for initial tests --- Cargo.lock | 28 +++++++++---------- src/lib.rs | 11 -------- src/parser.rs | 77 +++++++++++++++++++++++++++++---------------------- src/reader.rs | 34 +++++++++++++---------- src/writer.rs | 12 ++++++-- 5 files changed, 85 insertions(+), 77 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 431cbcc..f9658ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "addr2line" -version = "0.21.0" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "e7a2e47a1fbe209ee101dd6d61285226744c6c8d3c21c8dc878ba6cb9f467f3a" dependencies = [ "gimli", ] @@ -25,9 +25,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "4717cfcbfaa661a0fd48f8453951837ae7e8f81e481fbb136e3202d72805a744" dependencies = [ "addr2line", "cc", @@ -141,9 +141,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "0e4075386626662786ddb0ec9081e7c7eeb1ba31951f447ca780ef9f5d568189" [[package]] name = "libc" @@ -153,9 +153,9 @@ checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "memchr" -version = "2.7.1" +version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" [[package]] name = "minimal-lexical" @@ -165,11 +165,12 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" dependencies = [ "adler", + "autocfg", ] [[package]] @@ -184,12 +185,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" -dependencies = [ - "memchr", -] +checksum = "1a5b3dd1c072ee7963717671d1ca129f1048fda25edea6b752bfc71ac8854170" [[package]] name = "peanuts" diff --git a/src/lib.rs b/src/lib.rs index 3d71373..5430fd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,14 +3,3 @@ mod error; mod parser; mod reader; mod writer; - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = 2 + 2; - assert_eq!(result, 4); - } -} diff --git a/src/parser.rs b/src/parser.rs index 518aad4..07d48c6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,10 +2,7 @@ use std::char; use nom::{ branch::alt, - bytes::{ - complete::take_until, - streaming::{is_a, tag, take}, - }, + bytes::streaming::{is_a, tag, take, take_till, take_until}, character::{ complete::one_of, streaming::{char, digit1, none_of, satisfy}, @@ -19,11 +16,6 @@ use nom::{ // parser: parses tokens from lexer into events -enum Misc<'s> { - Comment(Comment<'s>), - PI(PI<'s>), -} - type Comment<'s> = &'s str; struct PI<'s> { @@ -34,46 +26,44 @@ struct PI<'s> { enum ContentItem<'s> { CharData(&'s str), Element(Element<'s>), - Reference(Reference<'s>), - CDSect(CDSect<'s>), + // Reference(Reference<'s>), + // CDSect(CDSect<'s>), } type Content<'s> = Option>>; -struct Element<'s> { - name: &'s str, - attributes: Vec>, - content: Content<'s>, -} - struct Attribute<'s> { key: &'s str, value: &'s str, } -// type VersionNum<'s> = &'s str; /// Contains only latin characters or dash after first char type EncName<'s> = &'s str; -// struct XMLDecl<'s> { -// version_info: VersionNum<'s>, -// encoding_decl: Option>, -// sd_decl: Option, -// } - struct DoctypeDecl<'s> { name: &'s str, - // TODO + // TODO: doctype declaration parsing } - +/// pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> { todo!() } +struct Element<'s> { + name: &'s str, + attributes: Vec>, + content: Content<'s>, +} +/// Element pub fn element(input: &str) -> IResult<&str, Element> { todo!() } +enum Misc<'s> { + Comment(Comment<'s>), + PI(PI<'s>), +} +/// Misc pub fn misc(input: &str) -> IResult<&str, Misc> { todo!() } @@ -210,7 +200,7 @@ pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> { type CharData<'s> = &'s str; /// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) pub fn char_data(input: &str) -> IResult<&str, CharData> { - take_until()(input) + recognize(take_until("]]>").and_then(take_till(|c| c == '<' || c == '&')))(input) } type Prolog<'s> = ( @@ -229,23 +219,23 @@ pub fn prolog(input: &str) -> IResult<&str, Prolog> { struct XMLDecl { version_info: VersionInfo, - encoding_decl: Option, - sd_decl: Option, + // encoding_decl: Option, + // sd_decl: Option, } /// [23] XMLDecl ::= '' pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> { // (VersionInfo, Option, Option) - let (leftover, (version_info, encoding_decl, sd_decl)) = delimited( + let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited( tag(""), )(input)?; Ok(( leftover, XMLDecl { version_info, - encoding_decl, - sd_decl, + // encoding_decl, + // sd_decl, }, )) } @@ -282,3 +272,24 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> { )), )(input) } + +pub fn reference(input: &str) -> IResult<&str, char> { + todo!() +} + +pub fn pe_reference(input: &str) -> IResult<&str, char> { + todo!() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_char_data() { + assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi")); + assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi")); + assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi")); + assert_eq!(Ok(("", "abcdefghi")), char_data("abcdefghi")); + } +} diff --git a/src/reader.rs b/src/reader.rs index 26e540e..6e622f4 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -18,21 +18,25 @@ impl Reader where R: AsyncBufRead, { - pub async fn read(&self) -> Result, Error> { - let buf = self.stream.poll_fill_buf().await?; - todo!() - } - pub async fn read_start(&self) -> Result, Error> {} - pub async fn read_end(&self) -> Result<(), Error> {} + // pub async fn read(&self) -> Result, Error> { + // let buf = self.stream.poll_fill_buf().await?; + // todo!() + // } + // pub async fn read_start(&self) -> Result, Error> { + // todo!() + // } + // pub async fn read_end(&self) -> Result<(), Error> { + // todo!() + // } } -impl Stream for Reader { - type Item = impl From; +// impl Stream for Reader { +// type Item = impl From; - async fn poll_next( - self: std::pin::Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - ) -> std::task::Poll> { - todo!() - } -} +// async fn poll_next( +// self: std::pin::Pin<&mut Self>, +// cx: &mut std::task::Context<'_>, +// ) -> std::task::Poll> { +// todo!() +// } +// } diff --git a/src/writer.rs b/src/writer.rs index d7fc037..456a5a1 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -12,9 +12,15 @@ pub struct Writer { } impl Writer { - pub async fn write(&self, element: impl Into) -> Result<(), Error> {} - pub async fn write_start(&self, element: impl Into) -> Result<(), Error> {} - pub async fn write_end(&self) -> Result<(), Error> {} + pub async fn write(&self, element: impl Into) -> Result<(), Error> { + todo!() + } + pub async fn write_start(&self, element: impl Into) -> Result<(), Error> { + todo!() + } + pub async fn write_end(&self) -> Result<(), Error> { + todo!() + } } impl> Sink for Writer {