WIP: more parsers
This commit is contained in:
parent
ea98ddced7
commit
9307f48d17
176
src/parser.rs
176
src/parser.rs
|
@ -2,7 +2,7 @@ use std::char;
|
|||
|
||||
use nom::{
|
||||
branch::{alt, permutation},
|
||||
bytes::streaming::{is_a, is_not, tag, take, take_till, take_until},
|
||||
bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until},
|
||||
character::{
|
||||
complete::one_of,
|
||||
streaming::{alpha1, char, digit1, none_of, satisfy},
|
||||
|
@ -16,13 +16,6 @@ use nom::{
|
|||
|
||||
// parser: parses tokens from lexer into events
|
||||
|
||||
type Comment<'s> = &'s str;
|
||||
|
||||
struct PI<'s> {
|
||||
target: &'s str,
|
||||
instruction: Option<&'s str>,
|
||||
}
|
||||
|
||||
enum ContentItem<'s> {
|
||||
CharData(&'s str),
|
||||
Element(Element<'s>),
|
||||
|
@ -197,79 +190,85 @@ pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> {
|
|||
)
|
||||
}
|
||||
|
||||
// TODO: wtf why doesn't this work how do i do thisjj
|
||||
type CharData<'s> = &'s str;
|
||||
/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
|
||||
pub fn char_data(input: &str) -> IResult<&str, CharData> {
|
||||
// tag(map(
|
||||
// peek(alt((
|
||||
// map_parser(
|
||||
// peek(take_until("]]>")),
|
||||
// nom::bytes::complete::take_till(|c| c == '<' || c == '&'),
|
||||
// ),
|
||||
// map_parser(
|
||||
// peek(take_till(|c| c == '<' || c == '&')),
|
||||
// nom::bytes::complete::take_until("]]>"),
|
||||
// ),
|
||||
// ))),
|
||||
// |(first, _)| first,
|
||||
// ))(input)
|
||||
recognize(many_till(
|
||||
none_of("<&"),
|
||||
peek(alt((recognize(one_of("<&")), tag("]]>")))),
|
||||
))(input)
|
||||
|
||||
// map(
|
||||
// tuple((is_not("<&]"), peek(alt((tag("<"), tag("&"), tag("]]>")))))),
|
||||
// |(first, _)| first,
|
||||
// )(input)
|
||||
// map(
|
||||
// tuple((recognize(many0(none_of("<&"))), opt(peek(tag("]]>"))))),
|
||||
// |(first, _)| first,
|
||||
// )(input)
|
||||
// alt((recognize(many0(none_of("<&"))), take_until("]]>")))(input)
|
||||
let tagg: &str;
|
||||
if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {
|
||||
if let Ok((_, tagg2)) =
|
||||
peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input)
|
||||
{
|
||||
if tagg1.len() < tagg2.len() {
|
||||
tagg = tagg1
|
||||
} else {
|
||||
tagg = tagg2
|
||||
}
|
||||
} else {
|
||||
tagg = tagg1;
|
||||
}
|
||||
} else {
|
||||
(_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?
|
||||
}
|
||||
tag(tagg)(input)
|
||||
// let mut len = 0;
|
||||
// let ch = input.chars().collect::<Vec<_>>();
|
||||
// for (idx, char) in ch.as_ref().into_iter().enumerate() {
|
||||
// match char {
|
||||
// '<' | '&' => break,
|
||||
// ']' => {
|
||||
// if idx <= ch.len() - 3 {}
|
||||
// },
|
||||
// _ => todo!(),
|
||||
// }
|
||||
// }
|
||||
// while let Some(char) = chars.next() {
|
||||
// if char == '<' || char == '&' {
|
||||
// break;
|
||||
// } else if char == ']' {
|
||||
// if let Some(next) = chars.peek() {
|
||||
// if next == ']' {
|
||||
// if let Some(next) = chars.next_if_eq() {}
|
||||
// }
|
||||
// let tagg: &str;
|
||||
// if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {
|
||||
// if let Ok((_, tagg2)) =
|
||||
// peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input)
|
||||
// {
|
||||
// if tagg1.len() < tagg2.len() {
|
||||
// tagg = tagg1
|
||||
// } else {
|
||||
// tagg = tagg2
|
||||
// }
|
||||
// } else {
|
||||
// tagg = tagg1;
|
||||
// }
|
||||
// len += 1;
|
||||
// } else {
|
||||
// (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?
|
||||
// }
|
||||
// todo!()
|
||||
// tag(tagg)(input)
|
||||
|
||||
// recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input)
|
||||
// recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input)
|
||||
// take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input)
|
||||
}
|
||||
|
||||
type Comment<'s> = &'s str;
|
||||
/// Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
|
||||
pub fn comment(input: &str) -> IResult<&str, Comment> {
|
||||
delimited(
|
||||
tag("<!--"),
|
||||
recognize(many_till(xmlchar, peek(tag("--")))),
|
||||
tag("-->"),
|
||||
)(input)
|
||||
}
|
||||
|
||||
struct PI<'s> {
|
||||
target: &'s str,
|
||||
instruction: Option<&'s str>,
|
||||
}
|
||||
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
||||
pub fn pi(input: &str) -> IResult<&str, PI> {
|
||||
let (rest, (target, instruction)) = delimited(
|
||||
tag("<?"),
|
||||
pair(
|
||||
pi_target,
|
||||
opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))),
|
||||
),
|
||||
tag("?>"),
|
||||
)(input)?;
|
||||
Ok((
|
||||
rest,
|
||||
PI {
|
||||
target,
|
||||
instruction,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
type PITarget<'s> = &'s str;
|
||||
/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
|
||||
pub fn pi_target(input: &str) -> IResult<&str, PITarget> {
|
||||
let (rest, name) = name(input)?;
|
||||
if name.to_lowercase() == "xml" {
|
||||
return Err(Err::Error(Error {
|
||||
input,
|
||||
// TODO: check if better error to return
|
||||
code: ErrorKind::Tag,
|
||||
}));
|
||||
} else {
|
||||
return Ok((rest, name));
|
||||
}
|
||||
}
|
||||
|
||||
type Prolog<'s> = (
|
||||
Option<XMLDecl>,
|
||||
Vec<Misc<'s>>,
|
||||
|
@ -320,8 +319,8 @@ pub fn version_info(input: &str) -> IResult<&str, VersionInfo> {
|
|||
}
|
||||
|
||||
/// [25] Eq ::= S? '=' S?
|
||||
pub fn eq(input: &str) -> IResult<&str, (Option<&str>, char, Option<&str>)> {
|
||||
tuple((opt(s), char('='), opt(s)))(input)
|
||||
pub fn eq(input: &str) -> IResult<&str, &str> {
|
||||
recognize(tuple((opt(s), char('='), opt(s))))(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -363,9 +362,42 @@ mod tests {
|
|||
assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi"));
|
||||
assert_eq!(
|
||||
Err(Err::Incomplete(nom::Needed::Size(
|
||||
NonZero::new(1usize).unwrap()
|
||||
NonZero::new(3usize).unwrap()
|
||||
))),
|
||||
char_data("abcdefghi")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comment() {
|
||||
assert_eq!(Ok(("", "")), comment("<!---->"));
|
||||
assert_eq!(Ok(("", "asdf")), comment("<!--asdf-->"));
|
||||
assert_eq!(Ok(("", "as-df")), comment("<!--as-df-->"));
|
||||
assert_eq!(
|
||||
Err(Err::Incomplete(nom::Needed::Size(
|
||||
NonZero::new(2usize).unwrap()
|
||||
))),
|
||||
comment("<!--asdf")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pi_target() {
|
||||
assert_eq!(Ok((" ", "asdf")), pi_target("asdf "));
|
||||
assert_eq!(Ok((" ", "xmlasdf")), pi_target("xmlasdf "));
|
||||
assert_eq!(
|
||||
Err(Err::Error(Error {
|
||||
input: "xml ",
|
||||
code: ErrorKind::Tag
|
||||
})),
|
||||
pi_target("xml ")
|
||||
);
|
||||
assert_eq!(
|
||||
Err(Err::Error(Error {
|
||||
input: "xMl ",
|
||||
code: ErrorKind::Tag
|
||||
})),
|
||||
pi_target("xMl ")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue