diff --git a/src/parser.rs b/src/parser.rs index 07d48c6..2382f68 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,15 +1,15 @@ use std::char; use nom::{ - branch::alt, - bytes::streaming::{is_a, tag, take, take_till, take_until}, + branch::{alt, permutation}, + bytes::streaming::{is_a, is_not, tag, take, take_till, take_until}, character::{ complete::one_of, - streaming::{char, digit1, none_of, satisfy}, + streaming::{alpha1, char, digit1, none_of, satisfy}, }, - combinator::{cond, map, map_parser, map_res, not, opt, recognize, value, verify}, - error::ErrorKind, - multi::{many0, many1}, + combinator::{cond, map, map_parser, map_res, not, opt, peek, recognize, value, verify}, + error::{Error, ErrorKind}, + multi::{many0, many1, many_till}, sequence::{delimited, pair, preceded, tuple}, Err, IResult, Parser, }; @@ -197,10 +197,77 @@ pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> { ) } +// TODO: wtf why doesn't this work how do i do thisjj type CharData<'s> = &'s str; /// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) pub fn char_data(input: &str) -> IResult<&str, CharData> { - recognize(take_until("]]>").and_then(take_till(|c| c == '<' || c == '&')))(input) + // tag(map( + // peek(alt(( + // map_parser( + // peek(take_until("]]>")), + // nom::bytes::complete::take_till(|c| c == '<' || c == '&'), + // ), + // map_parser( + // peek(take_till(|c| c == '<' || c == '&')), + // nom::bytes::complete::take_until("]]>"), + // ), + // ))), + // |(first, _)| first, + // ))(input) + + // map( + // tuple((is_not("<&]"), peek(alt((tag("<"), tag("&"), tag("]]>")))))), + // |(first, _)| first, + // )(input) + // map( + // tuple((recognize(many0(none_of("<&"))), opt(peek(tag("]]>"))))), + // |(first, _)| first, + // )(input) + // alt((recognize(many0(none_of("<&"))), take_until("]]>")))(input) + let tagg: &str; + if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) { + if let Ok((_, tagg2)) = + peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input) + { + if tagg1.len() < tagg2.len() { + tagg = tagg1 + } else { + tagg = tagg2 + } + } else { + tagg = tagg1; + } + } else { + (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)? + } + tag(tagg)(input) + // let mut len = 0; + // let ch = input.chars().collect::>(); + // for (idx, char) in ch.as_ref().into_iter().enumerate() { + // match char { + // '<' | '&' => break, + // ']' => { + // if idx <= ch.len() - 3 {} + // }, + // _ => todo!(), + // } + // } + // while let Some(char) = chars.next() { + // if char == '<' || char == '&' { + // break; + // } else if char == ']' { + // if let Some(next) = chars.peek() { + // if next == ']' { + // if let Some(next) = chars.next_if_eq() {} + // } + // } + // } + // len += 1; + // } + // todo!() + // recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input) + // recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input) + // take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input) } type Prolog<'s> = ( @@ -283,6 +350,8 @@ pub fn pe_reference(input: &str) -> IResult<&str, char> { #[cfg(test)] mod tests { + use std::num::NonZero; + use super::*; #[test] @@ -290,6 +359,13 @@ mod tests { assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi")); assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi")); assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi")); - assert_eq!(Ok(("", "abcdefghi")), char_data("abcdefghi")); + assert_eq!(Ok(("]]>def&ghi", "abc")), char_data("abc]]>def&ghi")); + assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi")); + assert_eq!( + Err(Err::Incomplete(nom::Needed::Size( + NonZero::new(1usize).unwrap() + ))), + char_data("abcdefghi") + ); } }