WIP: more parsers

This commit is contained in:
cel 🌸 2024-06-21 15:25:27 +01:00
parent ea98ddced7
commit 9307f48d17
1 changed files with 104 additions and 72 deletions

View File

@ -2,7 +2,7 @@ use std::char;
use nom::{
branch::{alt, permutation},
bytes::streaming::{is_a, is_not, tag, take, take_till, take_until},
bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until},
character::{
complete::one_of,
streaming::{alpha1, char, digit1, none_of, satisfy},
@ -16,13 +16,6 @@ use nom::{
// parser: parses tokens from lexer into events
type Comment<'s> = &'s str;
struct PI<'s> {
target: &'s str,
instruction: Option<&'s str>,
}
enum ContentItem<'s> {
CharData(&'s str),
Element(Element<'s>),
@ -197,79 +190,85 @@ pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> {
)
}
// TODO: wtf why doesn't this work how do i do thisjj
type CharData<'s> = &'s str;
/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
pub fn char_data(input: &str) -> IResult<&str, CharData> {
// tag(map(
// peek(alt((
// map_parser(
// peek(take_until("]]>")),
// nom::bytes::complete::take_till(|c| c == '<' || c == '&'),
// ),
// map_parser(
// peek(take_till(|c| c == '<' || c == '&')),
// nom::bytes::complete::take_until("]]>"),
// ),
// ))),
// |(first, _)| first,
// ))(input)
recognize(many_till(
none_of("<&"),
peek(alt((recognize(one_of("<&")), tag("]]>")))),
))(input)
// map(
// tuple((is_not("<&]"), peek(alt((tag("<"), tag("&"), tag("]]>")))))),
// |(first, _)| first,
// )(input)
// map(
// tuple((recognize(many0(none_of("<&"))), opt(peek(tag("]]>"))))),
// |(first, _)| first,
// )(input)
// alt((recognize(many0(none_of("<&"))), take_until("]]>")))(input)
let tagg: &str;
if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {
if let Ok((_, tagg2)) =
peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input)
{
if tagg1.len() < tagg2.len() {
tagg = tagg1
} else {
tagg = tagg2
}
} else {
tagg = tagg1;
}
} else {
(_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?
}
tag(tagg)(input)
// let mut len = 0;
// let ch = input.chars().collect::<Vec<_>>();
// for (idx, char) in ch.as_ref().into_iter().enumerate() {
// match char {
// '<' | '&' => break,
// ']' => {
// if idx <= ch.len() - 3 {}
// },
// _ => todo!(),
// }
// }
// while let Some(char) = chars.next() {
// if char == '<' || char == '&' {
// break;
// } else if char == ']' {
// if let Some(next) = chars.peek() {
// if next == ']' {
// if let Some(next) = chars.next_if_eq() {}
// }
// let tagg: &str;
// if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {
// if let Ok((_, tagg2)) =
// peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input)
// {
// if tagg1.len() < tagg2.len() {
// tagg = tagg1
// } else {
// tagg = tagg2
// }
// } else {
// tagg = tagg1;
// }
// len += 1;
// } else {
// (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?
// }
// todo!()
// tag(tagg)(input)
// recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input)
// recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input)
// take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input)
}
type Comment<'s> = &'s str;
/// Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
pub fn comment(input: &str) -> IResult<&str, Comment> {
delimited(
tag("<!--"),
recognize(many_till(xmlchar, peek(tag("--")))),
tag("-->"),
)(input)
}
struct PI<'s> {
target: &'s str,
instruction: Option<&'s str>,
}
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
pub fn pi(input: &str) -> IResult<&str, PI> {
let (rest, (target, instruction)) = delimited(
tag("<?"),
pair(
pi_target,
opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))),
),
tag("?>"),
)(input)?;
Ok((
rest,
PI {
target,
instruction,
},
))
}
type PITarget<'s> = &'s str;
/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
pub fn pi_target(input: &str) -> IResult<&str, PITarget> {
let (rest, name) = name(input)?;
if name.to_lowercase() == "xml" {
return Err(Err::Error(Error {
input,
// TODO: check if better error to return
code: ErrorKind::Tag,
}));
} else {
return Ok((rest, name));
}
}
type Prolog<'s> = (
Option<XMLDecl>,
Vec<Misc<'s>>,
@ -320,8 +319,8 @@ pub fn version_info(input: &str) -> IResult<&str, VersionInfo> {
}
/// [25] Eq ::= S? '=' S?
pub fn eq(input: &str) -> IResult<&str, (Option<&str>, char, Option<&str>)> {
tuple((opt(s), char('='), opt(s)))(input)
pub fn eq(input: &str) -> IResult<&str, &str> {
recognize(tuple((opt(s), char('='), opt(s))))(input)
}
#[derive(Clone)]
@ -363,9 +362,42 @@ mod tests {
assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi"));
assert_eq!(
Err(Err::Incomplete(nom::Needed::Size(
NonZero::new(1usize).unwrap()
NonZero::new(3usize).unwrap()
))),
char_data("abcdefghi")
);
}
#[test]
fn test_comment() {
assert_eq!(Ok(("", "")), comment("<!---->"));
assert_eq!(Ok(("", "asdf")), comment("<!--asdf-->"));
assert_eq!(Ok(("", "as-df")), comment("<!--as-df-->"));
assert_eq!(
Err(Err::Incomplete(nom::Needed::Size(
NonZero::new(2usize).unwrap()
))),
comment("<!--asdf")
);
}
#[test]
fn test_pi_target() {
assert_eq!(Ok((" ", "asdf")), pi_target("asdf "));
assert_eq!(Ok((" ", "xmlasdf")), pi_target("xmlasdf "));
assert_eq!(
Err(Err::Error(Error {
input: "xml ",
code: ErrorKind::Tag
})),
pi_target("xml ")
);
assert_eq!(
Err(Err::Error(Error {
input: "xMl ",
code: ErrorKind::Tag
})),
pi_target("xMl ")
);
}
}