WIP: conditional sections
This commit is contained in:
parent
435b2af24c
commit
b9d88e5c6f
247
src/parser.rs
247
src/parser.rs
|
@ -82,6 +82,7 @@ pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {
|
||||||
recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
|
recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
enum LiteralData<'s> {
|
enum LiteralData<'s> {
|
||||||
String(&'s str),
|
String(&'s str),
|
||||||
PEReference(PEReference<'s>),
|
PEReference(PEReference<'s>),
|
||||||
|
@ -731,8 +732,8 @@ pub fn choice(input: &str) -> IResult<&str, Choice> {
|
||||||
pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))),
|
pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))),
|
||||||
pair(opt(s), tag(")")),
|
pair(opt(s), tag(")")),
|
||||||
),
|
),
|
||||||
|(first, rest)| {
|
|(head, tail)| {
|
||||||
let choice = vec![vec![first], rest].concat();
|
let choice = vec![vec![head], tail].concat();
|
||||||
Choice(choice)
|
Choice(choice)
|
||||||
},
|
},
|
||||||
)(input)
|
)(input)
|
||||||
|
@ -748,8 +749,8 @@ pub fn seq(input: &str) -> IResult<&str, Seq> {
|
||||||
pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))),
|
pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))),
|
||||||
pair(opt(s), tag(")")),
|
pair(opt(s), tag(")")),
|
||||||
),
|
),
|
||||||
|(first, rest)| {
|
|(head, tail)| {
|
||||||
let seq = vec![vec![first], rest].concat();
|
let seq = vec![vec![head], tail].concat();
|
||||||
Seq(seq)
|
Seq(seq)
|
||||||
},
|
},
|
||||||
)(input)
|
)(input)
|
||||||
|
@ -776,6 +777,243 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> {
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct AttlistDecl<'s> {
|
||||||
|
element_type: Name<'s>,
|
||||||
|
att_defs: Vec<AttDef<'s>>,
|
||||||
|
}
|
||||||
|
/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
|
||||||
|
pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
pair(tag("<!ATTLIST"), s),
|
||||||
|
pair(name, many0(att_def)),
|
||||||
|
pair(opt(s), tag(">")),
|
||||||
|
),
|
||||||
|
|(element_type, att_defs)| AttlistDecl {
|
||||||
|
element_type,
|
||||||
|
att_defs,
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AttDef<'s> {
|
||||||
|
name: Name<'s>,
|
||||||
|
att_type: AttType<'s>,
|
||||||
|
default_decl: DefaultDecl<'s>,
|
||||||
|
}
|
||||||
|
/// [53] AttDef ::= S Name S AttType S DefaultDecl
|
||||||
|
pub fn att_def(input: &str) -> IResult<&str, AttDef> {
|
||||||
|
map(
|
||||||
|
tuple((
|
||||||
|
preceded(s, name),
|
||||||
|
preceded(s, att_type),
|
||||||
|
preceded(s, default_decl),
|
||||||
|
)),
|
||||||
|
|(name, att_type, default_decl)| AttDef {
|
||||||
|
name,
|
||||||
|
att_type,
|
||||||
|
default_decl,
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
enum AttType<'s> {
|
||||||
|
StringType,
|
||||||
|
TokenizedType(TokenizedType),
|
||||||
|
EnumeratedType(EnumeratedType<'s>),
|
||||||
|
}
|
||||||
|
/// [54] AttType ::= StringType | TokenizedType | EnumeratedType
|
||||||
|
pub fn att_type(input: &str) -> IResult<&str, AttType> {
|
||||||
|
alt((
|
||||||
|
value(AttType::StringType, string_type),
|
||||||
|
map(tokenized_type, |tokenized_type| {
|
||||||
|
AttType::TokenizedType(tokenized_type)
|
||||||
|
}),
|
||||||
|
map(enumerated_type, |enumerated_type| {
|
||||||
|
AttType::EnumeratedType(enumerated_type)
|
||||||
|
}),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
type StringType<'s> = &'s str;
|
||||||
|
/// [55] StringType ::= 'CDATA'
|
||||||
|
pub fn string_type(input: &str) -> IResult<&str, StringType> {
|
||||||
|
tag("CDATA")(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
enum TokenizedType {
|
||||||
|
ID,
|
||||||
|
IDRef,
|
||||||
|
IDRefs,
|
||||||
|
Entity,
|
||||||
|
Entities,
|
||||||
|
NMToken,
|
||||||
|
NMTokens,
|
||||||
|
}
|
||||||
|
/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
|
||||||
|
pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> {
|
||||||
|
alt((
|
||||||
|
value(TokenizedType::ID, tag("ID")),
|
||||||
|
// TODO: check if this is required
|
||||||
|
// try idrefs first to avoid losing 'S'
|
||||||
|
value(TokenizedType::IDRefs, tag("IDREFS")),
|
||||||
|
value(TokenizedType::IDRef, tag("IDREF")),
|
||||||
|
value(TokenizedType::Entity, tag("ENTITY")),
|
||||||
|
value(TokenizedType::Entities, tag("ENTITIES")),
|
||||||
|
// same here
|
||||||
|
value(TokenizedType::NMTokens, tag("NMTOKENS")),
|
||||||
|
value(TokenizedType::NMToken, tag("NMTOKEN")),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
enum EnumeratedType<'s> {
|
||||||
|
NotationType(NotationType<'s>),
|
||||||
|
Enumeration(Enumeration<'s>),
|
||||||
|
}
|
||||||
|
/// [57] EnumeratedType ::= NotationType | Enumeration
|
||||||
|
pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> {
|
||||||
|
alt((
|
||||||
|
map(notation_type, |notation_type| {
|
||||||
|
EnumeratedType::NotationType(notation_type)
|
||||||
|
}),
|
||||||
|
map(enumeration, |enumeration| {
|
||||||
|
EnumeratedType::Enumeration(enumeration)
|
||||||
|
}),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct NotationType<'s>(Vec<Name<'s>>);
|
||||||
|
/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
|
||||||
|
pub fn notation_type(input: &str) -> IResult<&str, NotationType> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
tuple((tag("NOTATION"), s, tag("("), opt(s))),
|
||||||
|
pair(
|
||||||
|
name,
|
||||||
|
many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)),
|
||||||
|
),
|
||||||
|
pair(opt(s), tag(")")),
|
||||||
|
),
|
||||||
|
|(head, tail)| {
|
||||||
|
let notation_type = vec![vec![head], tail].concat();
|
||||||
|
NotationType(notation_type)
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct Enumeration<'s>(Vec<Nmtoken<'s>>);
|
||||||
|
/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
|
||||||
|
pub fn enumeration(input: &str) -> IResult<&str, Enumeration> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
pair(tag("("), opt(s)),
|
||||||
|
pair(
|
||||||
|
nmtoken,
|
||||||
|
many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)),
|
||||||
|
),
|
||||||
|
pair(opt(s), tag(")")),
|
||||||
|
),
|
||||||
|
|(head, tail)| {
|
||||||
|
let enumeration = vec![vec![head], tail].concat();
|
||||||
|
Enumeration(enumeration)
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
enum DefaultDecl<'s> {
|
||||||
|
Required,
|
||||||
|
Implied,
|
||||||
|
Fixed(AttValue<'s>),
|
||||||
|
}
|
||||||
|
/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
|
||||||
|
pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> {
|
||||||
|
alt((
|
||||||
|
value(DefaultDecl::Required, tag("#REQUIRED")),
|
||||||
|
value(DefaultDecl::Implied, tag("#IMPLIED")),
|
||||||
|
map(
|
||||||
|
preceded(opt(pair(tag("#FIXED"), s)), att_value),
|
||||||
|
|att_value| DefaultDecl::Fixed(att_value),
|
||||||
|
),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ConditionalSect<'s> {
|
||||||
|
IncludeSect(IncludeSect<'s>),
|
||||||
|
IgnoreSect(IgnoreSect<'s>),
|
||||||
|
}
|
||||||
|
/// [61] conditionalSect ::= includeSect | ignoreSect
|
||||||
|
pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> {
|
||||||
|
alt((
|
||||||
|
map(include_sect, |include_sect| {
|
||||||
|
ConditionalSect::IncludeSect(include_sect)
|
||||||
|
}),
|
||||||
|
map(ignore_sect, |ignore_sect| {
|
||||||
|
ConditionalSect::IgnoreSect(ignore_sect)
|
||||||
|
}),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct IncludeSect<'s>(ExtSubsetDecl<'s>);
|
||||||
|
/// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
|
||||||
|
pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
tuple((tag("<!["), opt(s), tag("INCLUDE"), opt(s), tag("["))),
|
||||||
|
ext_subset_decl,
|
||||||
|
tag("]]>"),
|
||||||
|
),
|
||||||
|
|ext_subset_decl| IncludeSect(ext_subset_decl),
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);
|
||||||
|
/// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
|
||||||
|
pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
tuple((tag("<!["), opt(s), tag("IGNORE"), opt(s), tag("["))),
|
||||||
|
many0(ignore_sect_contents),
|
||||||
|
tag("]]>"),
|
||||||
|
),
|
||||||
|
|ignore_sect_contents| IgnoreSect(ignore_sect_contents),
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct IgnoreSectContents<'s> {
|
||||||
|
// TODO: what the fuck does this mean
|
||||||
|
ignore: Ignore<'s>,
|
||||||
|
ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,
|
||||||
|
}
|
||||||
|
/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
|
||||||
|
pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> {
|
||||||
|
map(
|
||||||
|
pair(
|
||||||
|
ignore,
|
||||||
|
many0(tuple((
|
||||||
|
delimited(tag("<!["), ignore_sect_contents, tag("]]>")),
|
||||||
|
ignore,
|
||||||
|
))),
|
||||||
|
),
|
||||||
|
|(ignore, ignore_list)| IgnoreSectContents {
|
||||||
|
ignore,
|
||||||
|
ignore_list,
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Ignore<'s> = &'s str;
|
||||||
|
/// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
|
||||||
|
pub fn ignore(input: &str) -> IResult<&str, Ignore> {
|
||||||
|
recognize(many_till(xmlchar, peek(alt((tag("<!["), tag("]]>"))))))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
enum CharRef<'s> {
|
enum CharRef<'s> {
|
||||||
Decimal(&'s str),
|
Decimal(&'s str),
|
||||||
Hexadecimal(&'s str),
|
Hexadecimal(&'s str),
|
||||||
|
@ -801,6 +1039,7 @@ pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
enum Reference<'s> {
|
enum Reference<'s> {
|
||||||
EntityRef(EntityRef<'s>),
|
EntityRef(EntityRef<'s>),
|
||||||
CharRef(CharRef<'s>),
|
CharRef(CharRef<'s>),
|
||||||
|
|
Loading…
Reference in New Issue