WIP: XMLDecl stuff

This commit is contained in:
cel 🌸 2024-06-21 17:08:45 +01:00
parent 0a353135c0
commit 3a875666a5
1 changed files with 69 additions and 21 deletions

View File

@ -30,9 +30,6 @@ struct Attribute<'s> {
value: &'s str,
}
/// Contains only latin characters or dash after first char
type EncName<'s> = &'s str;
struct DoctypeDecl<'s> {
name: &'s str,
// TODO: doctype declaration parsing
@ -52,15 +49,6 @@ pub fn element(input: &str) -> IResult<&str, Element> {
todo!()
}
enum Misc<'s> {
Comment(Comment<'s>),
PI(PI<'s>),
}
/// Misc
pub fn misc(input: &str) -> IResult<&str, Misc> {
todo!()
}
type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [1] document ::= prolog element Misc*
pub fn document(input: &str) -> IResult<&str, Document> {
@ -231,6 +219,7 @@ pub fn comment(input: &str) -> IResult<&str, Comment> {
)(input)
}
#[derive(Clone)]
struct PI<'s> {
target: &'s str,
instruction: Option<&'s str>,
@ -294,7 +283,7 @@ pub fn cd_end(input: &str) -> IResult<&str, CDEnd> {
}
type Prolog<'s> = (
Option<XMLDecl>,
Option<XMLDecl<'s>>,
Vec<Misc<'s>>,
Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,
);
@ -307,25 +296,26 @@ pub fn prolog(input: &str) -> IResult<&str, Prolog> {
))(input)
}
struct XMLDecl {
struct XMLDecl<'s> {
version_info: VersionInfo,
// encoding_decl: Option<EncodingDecl>,
// sd_decl: Option<SDDecl>,
encoding_decl: Option<EncodingDecl<'s>>,
sd_decl: Option<SDDecl>,
}
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {
// (VersionInfo, Option<EncodingDecl>, Option<SDDecl>)
let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited(
let (leftover, (version_info, encoding_decl, sd_decl)) = delimited(
tag("<?xml"),
tuple((version_info /* opt(encoding_decl), opt(sd_decl) */,)),
tag("?>"),
tuple((version_info, opt(encoding_decl), opt(sd_decl))),
pair(opt(s), tag("?>")),
)(input)?;
// TODO: change to map
Ok((
leftover,
XMLDecl {
version_info,
// encoding_decl,
// sd_decl,
encoding_decl,
sd_decl,
},
))
}
@ -363,6 +353,41 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> {
)(input)
}
#[derive(Clone)]
enum Misc<'s> {
Comment(Comment<'s>),
PI(PI<'s>),
S,
}
/// [27] Misc ::= Comment | PI | S
pub fn misc(input: &str) -> IResult<&str, Misc> {
alt((
map(comment, |comment| Misc::Comment(comment)),
map(pi, |pi| Misc::PI(pi)),
value(Misc::S, s),
))(input)
}
type SDDecl = bool;
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
preceded(
tuple((s, tag("standalone"), eq)),
alt((
delimited(
char('\''),
alt((value(true, tag("yes")), value(false, tag("no")))),
char('\''),
),
delimited(
char('"'),
alt((value(true, tag("yes")), value(false, tag("no")))),
char('"'),
),
)),
)(input)
}
pub fn reference(input: &str) -> IResult<&str, char> {
todo!()
}
@ -371,6 +396,29 @@ pub fn pe_reference(input: &str) -> IResult<&str, char> {
todo!()
}
type EncodingDecl<'s> = EncName<'s>;
/// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName
pub fn encoding_decl(input: &str) -> IResult<&str, EncodingDecl> {
preceded(
tuple((s, tag("encoding"), eq)),
alt((
delimited(char('"'), enc_name, char('"')),
delimited(char('\''), enc_name, char('\'')),
)),
)(input)
}
type EncName<'s> = &'s str;
/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
pub fn enc_name(input: &str) -> IResult<&str, EncName> {
recognize(pair(
satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )),
many0(satisfy(
|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ),
)),
))(input)
}
#[cfg(test)]
mod tests {
use std::num::NonZero;