WIP: conditional sections

This commit is contained in:
cel 🌸 2024-06-25 22:44:47 +01:00
parent 435b2af24c
commit b9d88e5c6f
1 changed files with 243 additions and 4 deletions

View File

@ -82,6 +82,7 @@ pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {
recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
}
#[derive(Clone)]
enum LiteralData<'s> {
String(&'s str),
PEReference(PEReference<'s>),
@ -731,8 +732,8 @@ pub fn choice(input: &str) -> IResult<&str, Choice> {
pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))),
pair(opt(s), tag(")")),
),
|(first, rest)| {
let choice = vec![vec![first], rest].concat();
|(head, tail)| {
let choice = vec![vec![head], tail].concat();
Choice(choice)
},
)(input)
@ -748,8 +749,8 @@ pub fn seq(input: &str) -> IResult<&str, Seq> {
pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))),
pair(opt(s), tag(")")),
),
|(first, rest)| {
let seq = vec![vec![first], rest].concat();
|(head, tail)| {
let seq = vec![vec![head], tail].concat();
Seq(seq)
},
)(input)
@ -776,6 +777,243 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> {
))(input)
}
struct AttlistDecl<'s> {
element_type: Name<'s>,
att_defs: Vec<AttDef<'s>>,
}
/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
map(
delimited(
pair(tag("<!ATTLIST"), s),
pair(name, many0(att_def)),
pair(opt(s), tag(">")),
),
|(element_type, att_defs)| AttlistDecl {
element_type,
att_defs,
},
)(input)
}
struct AttDef<'s> {
name: Name<'s>,
att_type: AttType<'s>,
default_decl: DefaultDecl<'s>,
}
/// [53] AttDef ::= S Name S AttType S DefaultDecl
pub fn att_def(input: &str) -> IResult<&str, AttDef> {
map(
tuple((
preceded(s, name),
preceded(s, att_type),
preceded(s, default_decl),
)),
|(name, att_type, default_decl)| AttDef {
name,
att_type,
default_decl,
},
)(input)
}
#[derive(Clone)]
enum AttType<'s> {
StringType,
TokenizedType(TokenizedType),
EnumeratedType(EnumeratedType<'s>),
}
/// [54] AttType ::= StringType | TokenizedType | EnumeratedType
pub fn att_type(input: &str) -> IResult<&str, AttType> {
alt((
value(AttType::StringType, string_type),
map(tokenized_type, |tokenized_type| {
AttType::TokenizedType(tokenized_type)
}),
map(enumerated_type, |enumerated_type| {
AttType::EnumeratedType(enumerated_type)
}),
))(input)
}
type StringType<'s> = &'s str;
/// [55] StringType ::= 'CDATA'
pub fn string_type(input: &str) -> IResult<&str, StringType> {
tag("CDATA")(input)
}
#[derive(Clone)]
enum TokenizedType {
ID,
IDRef,
IDRefs,
Entity,
Entities,
NMToken,
NMTokens,
}
/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> {
alt((
value(TokenizedType::ID, tag("ID")),
// TODO: check if this is required
// try idrefs first to avoid losing 'S'
value(TokenizedType::IDRefs, tag("IDREFS")),
value(TokenizedType::IDRef, tag("IDREF")),
value(TokenizedType::Entity, tag("ENTITY")),
value(TokenizedType::Entities, tag("ENTITIES")),
// same here
value(TokenizedType::NMTokens, tag("NMTOKENS")),
value(TokenizedType::NMToken, tag("NMTOKEN")),
))(input)
}
#[derive(Clone)]
enum EnumeratedType<'s> {
NotationType(NotationType<'s>),
Enumeration(Enumeration<'s>),
}
/// [57] EnumeratedType ::= NotationType | Enumeration
pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> {
alt((
map(notation_type, |notation_type| {
EnumeratedType::NotationType(notation_type)
}),
map(enumeration, |enumeration| {
EnumeratedType::Enumeration(enumeration)
}),
))(input)
}
#[derive(Clone)]
struct NotationType<'s>(Vec<Name<'s>>);
/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
pub fn notation_type(input: &str) -> IResult<&str, NotationType> {
map(
delimited(
tuple((tag("NOTATION"), s, tag("("), opt(s))),
pair(
name,
many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)),
),
pair(opt(s), tag(")")),
),
|(head, tail)| {
let notation_type = vec![vec![head], tail].concat();
NotationType(notation_type)
},
)(input)
}
#[derive(Clone)]
struct Enumeration<'s>(Vec<Nmtoken<'s>>);
/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
pub fn enumeration(input: &str) -> IResult<&str, Enumeration> {
map(
delimited(
pair(tag("("), opt(s)),
pair(
nmtoken,
many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)),
),
pair(opt(s), tag(")")),
),
|(head, tail)| {
let enumeration = vec![vec![head], tail].concat();
Enumeration(enumeration)
},
)(input)
}
#[derive(Clone)]
enum DefaultDecl<'s> {
Required,
Implied,
Fixed(AttValue<'s>),
}
/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> {
alt((
value(DefaultDecl::Required, tag("#REQUIRED")),
value(DefaultDecl::Implied, tag("#IMPLIED")),
map(
preceded(opt(pair(tag("#FIXED"), s)), att_value),
|att_value| DefaultDecl::Fixed(att_value),
),
))(input)
}
enum ConditionalSect<'s> {
IncludeSect(IncludeSect<'s>),
IgnoreSect(IgnoreSect<'s>),
}
/// [61] conditionalSect ::= includeSect | ignoreSect
pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> {
alt((
map(include_sect, |include_sect| {
ConditionalSect::IncludeSect(include_sect)
}),
map(ignore_sect, |ignore_sect| {
ConditionalSect::IgnoreSect(ignore_sect)
}),
))(input)
}
struct IncludeSect<'s>(ExtSubsetDecl<'s>);
/// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> {
map(
delimited(
tuple((tag("<!["), opt(s), tag("INCLUDE"), opt(s), tag("["))),
ext_subset_decl,
tag("]]>"),
),
|ext_subset_decl| IncludeSect(ext_subset_decl),
)(input)
}
struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);
/// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> {
map(
delimited(
tuple((tag("<!["), opt(s), tag("IGNORE"), opt(s), tag("["))),
many0(ignore_sect_contents),
tag("]]>"),
),
|ignore_sect_contents| IgnoreSect(ignore_sect_contents),
)(input)
}
struct IgnoreSectContents<'s> {
// TODO: what the fuck does this mean
ignore: Ignore<'s>,
ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,
}
/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> {
map(
pair(
ignore,
many0(tuple((
delimited(tag("<!["), ignore_sect_contents, tag("]]>")),
ignore,
))),
),
|(ignore, ignore_list)| IgnoreSectContents {
ignore,
ignore_list,
},
)(input)
}
type Ignore<'s> = &'s str;
/// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
pub fn ignore(input: &str) -> IResult<&str, Ignore> {
recognize(many_till(xmlchar, peek(alt((tag("<!["), tag("]]>"))))))(input)
}
#[derive(Clone)]
enum CharRef<'s> {
Decimal(&'s str),
Hexadecimal(&'s str),
@ -801,6 +1039,7 @@ pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
))(input)
}
#[derive(Clone)]
enum Reference<'s> {
EntityRef(EntityRef<'s>),
CharRef(CharRef<'s>),