From 3a875666a5a897d92a9c6d92a67867bcae662211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= Date: Fri, 21 Jun 2024 17:08:45 +0100 Subject: [PATCH] WIP: XMLDecl stuff --- src/parser.rs | 90 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 69 insertions(+), 21 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index f882064..2acd579 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -30,9 +30,6 @@ struct Attribute<'s> { value: &'s str, } -/// Contains only latin characters or dash after first char -type EncName<'s> = &'s str; - struct DoctypeDecl<'s> { name: &'s str, // TODO: doctype declaration parsing @@ -52,15 +49,6 @@ pub fn element(input: &str) -> IResult<&str, Element> { todo!() } -enum Misc<'s> { - Comment(Comment<'s>), - PI(PI<'s>), -} -/// Misc -pub fn misc(input: &str) -> IResult<&str, Misc> { - todo!() -} - type Document<'s> = (Prolog<'s>, Element<'s>, Vec>); /// [1] document ::= prolog element Misc* pub fn document(input: &str) -> IResult<&str, Document> { @@ -231,6 +219,7 @@ pub fn comment(input: &str) -> IResult<&str, Comment> { )(input) } +#[derive(Clone)] struct PI<'s> { target: &'s str, instruction: Option<&'s str>, @@ -294,7 +283,7 @@ pub fn cd_end(input: &str) -> IResult<&str, CDEnd> { } type Prolog<'s> = ( - Option, + Option>, Vec>, Option<(DoctypeDecl<'s>, Vec>)>, ); @@ -307,25 +296,26 @@ pub fn prolog(input: &str) -> IResult<&str, Prolog> { ))(input) } -struct XMLDecl { +struct XMLDecl<'s> { version_info: VersionInfo, - // encoding_decl: Option, - // sd_decl: Option, + encoding_decl: Option>, + sd_decl: Option, } /// [23] XMLDecl ::= '' pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> { // (VersionInfo, Option, Option) - let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited( + let (leftover, (version_info, encoding_decl, sd_decl)) = delimited( tag(""), + tuple((version_info, opt(encoding_decl), opt(sd_decl))), + pair(opt(s), tag("?>")), )(input)?; + // TODO: change to map Ok(( leftover, XMLDecl { version_info, - // encoding_decl, - // sd_decl, + encoding_decl, + sd_decl, }, )) } @@ -363,6 +353,41 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> { )(input) } +#[derive(Clone)] +enum Misc<'s> { + Comment(Comment<'s>), + PI(PI<'s>), + S, +} +/// [27] Misc ::= Comment | PI | S +pub fn misc(input: &str) -> IResult<&str, Misc> { + alt(( + map(comment, |comment| Misc::Comment(comment)), + map(pi, |pi| Misc::PI(pi)), + value(Misc::S, s), + ))(input) +} + +type SDDecl = bool; +/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) +pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> { + preceded( + tuple((s, tag("standalone"), eq)), + alt(( + delimited( + char('\''), + alt((value(true, tag("yes")), value(false, tag("no")))), + char('\''), + ), + delimited( + char('"'), + alt((value(true, tag("yes")), value(false, tag("no")))), + char('"'), + ), + )), + )(input) +} + pub fn reference(input: &str) -> IResult<&str, char> { todo!() } @@ -371,6 +396,29 @@ pub fn pe_reference(input: &str) -> IResult<&str, char> { todo!() } +type EncodingDecl<'s> = EncName<'s>; +/// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName +pub fn encoding_decl(input: &str) -> IResult<&str, EncodingDecl> { + preceded( + tuple((s, tag("encoding"), eq)), + alt(( + delimited(char('"'), enc_name, char('"')), + delimited(char('\''), enc_name, char('\'')), + )), + )(input) +} + +type EncName<'s> = &'s str; +/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* +pub fn enc_name(input: &str) -> IResult<&str, EncName> { + recognize(pair( + satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )), + many0(satisfy( + |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ), + )), + ))(input) +} + #[cfg(test)] mod tests { use std::num::NonZero;