WIP: conditional sections
This commit is contained in:
parent
435b2af24c
commit
b9d88e5c6f
247
src/parser.rs
247
src/parser.rs
|
@ -82,6 +82,7 @@ pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {
|
|||
recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum LiteralData<'s> {
|
||||
String(&'s str),
|
||||
PEReference(PEReference<'s>),
|
||||
|
@ -731,8 +732,8 @@ pub fn choice(input: &str) -> IResult<&str, Choice> {
|
|||
pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))),
|
||||
pair(opt(s), tag(")")),
|
||||
),
|
||||
|(first, rest)| {
|
||||
let choice = vec![vec![first], rest].concat();
|
||||
|(head, tail)| {
|
||||
let choice = vec![vec![head], tail].concat();
|
||||
Choice(choice)
|
||||
},
|
||||
)(input)
|
||||
|
@ -748,8 +749,8 @@ pub fn seq(input: &str) -> IResult<&str, Seq> {
|
|||
pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))),
|
||||
pair(opt(s), tag(")")),
|
||||
),
|
||||
|(first, rest)| {
|
||||
let seq = vec![vec![first], rest].concat();
|
||||
|(head, tail)| {
|
||||
let seq = vec![vec![head], tail].concat();
|
||||
Seq(seq)
|
||||
},
|
||||
)(input)
|
||||
|
@ -776,6 +777,243 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> {
|
|||
))(input)
|
||||
}
|
||||
|
||||
struct AttlistDecl<'s> {
|
||||
element_type: Name<'s>,
|
||||
att_defs: Vec<AttDef<'s>>,
|
||||
}
|
||||
/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
|
||||
pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
|
||||
map(
|
||||
delimited(
|
||||
pair(tag("<!ATTLIST"), s),
|
||||
pair(name, many0(att_def)),
|
||||
pair(opt(s), tag(">")),
|
||||
),
|
||||
|(element_type, att_defs)| AttlistDecl {
|
||||
element_type,
|
||||
att_defs,
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
struct AttDef<'s> {
|
||||
name: Name<'s>,
|
||||
att_type: AttType<'s>,
|
||||
default_decl: DefaultDecl<'s>,
|
||||
}
|
||||
/// [53] AttDef ::= S Name S AttType S DefaultDecl
|
||||
pub fn att_def(input: &str) -> IResult<&str, AttDef> {
|
||||
map(
|
||||
tuple((
|
||||
preceded(s, name),
|
||||
preceded(s, att_type),
|
||||
preceded(s, default_decl),
|
||||
)),
|
||||
|(name, att_type, default_decl)| AttDef {
|
||||
name,
|
||||
att_type,
|
||||
default_decl,
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum AttType<'s> {
|
||||
StringType,
|
||||
TokenizedType(TokenizedType),
|
||||
EnumeratedType(EnumeratedType<'s>),
|
||||
}
|
||||
/// [54] AttType ::= StringType | TokenizedType | EnumeratedType
|
||||
pub fn att_type(input: &str) -> IResult<&str, AttType> {
|
||||
alt((
|
||||
value(AttType::StringType, string_type),
|
||||
map(tokenized_type, |tokenized_type| {
|
||||
AttType::TokenizedType(tokenized_type)
|
||||
}),
|
||||
map(enumerated_type, |enumerated_type| {
|
||||
AttType::EnumeratedType(enumerated_type)
|
||||
}),
|
||||
))(input)
|
||||
}
|
||||
|
||||
type StringType<'s> = &'s str;
|
||||
/// [55] StringType ::= 'CDATA'
|
||||
pub fn string_type(input: &str) -> IResult<&str, StringType> {
|
||||
tag("CDATA")(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum TokenizedType {
|
||||
ID,
|
||||
IDRef,
|
||||
IDRefs,
|
||||
Entity,
|
||||
Entities,
|
||||
NMToken,
|
||||
NMTokens,
|
||||
}
|
||||
/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
|
||||
pub fn tokenized_type(input: &str) -> IResult<&str, TokenizedType> {
|
||||
alt((
|
||||
value(TokenizedType::ID, tag("ID")),
|
||||
// TODO: check if this is required
|
||||
// try idrefs first to avoid losing 'S'
|
||||
value(TokenizedType::IDRefs, tag("IDREFS")),
|
||||
value(TokenizedType::IDRef, tag("IDREF")),
|
||||
value(TokenizedType::Entity, tag("ENTITY")),
|
||||
value(TokenizedType::Entities, tag("ENTITIES")),
|
||||
// same here
|
||||
value(TokenizedType::NMTokens, tag("NMTOKENS")),
|
||||
value(TokenizedType::NMToken, tag("NMTOKEN")),
|
||||
))(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum EnumeratedType<'s> {
|
||||
NotationType(NotationType<'s>),
|
||||
Enumeration(Enumeration<'s>),
|
||||
}
|
||||
/// [57] EnumeratedType ::= NotationType | Enumeration
|
||||
pub fn enumerated_type(input: &str) -> IResult<&str, EnumeratedType> {
|
||||
alt((
|
||||
map(notation_type, |notation_type| {
|
||||
EnumeratedType::NotationType(notation_type)
|
||||
}),
|
||||
map(enumeration, |enumeration| {
|
||||
EnumeratedType::Enumeration(enumeration)
|
||||
}),
|
||||
))(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct NotationType<'s>(Vec<Name<'s>>);
|
||||
/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
|
||||
pub fn notation_type(input: &str) -> IResult<&str, NotationType> {
|
||||
map(
|
||||
delimited(
|
||||
tuple((tag("NOTATION"), s, tag("("), opt(s))),
|
||||
pair(
|
||||
name,
|
||||
many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)),
|
||||
),
|
||||
pair(opt(s), tag(")")),
|
||||
),
|
||||
|(head, tail)| {
|
||||
let notation_type = vec![vec![head], tail].concat();
|
||||
NotationType(notation_type)
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Enumeration<'s>(Vec<Nmtoken<'s>>);
|
||||
/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
|
||||
pub fn enumeration(input: &str) -> IResult<&str, Enumeration> {
|
||||
map(
|
||||
delimited(
|
||||
pair(tag("("), opt(s)),
|
||||
pair(
|
||||
nmtoken,
|
||||
many0(preceded(tuple((opt(s), tag("|"), opt(s))), nmtoken)),
|
||||
),
|
||||
pair(opt(s), tag(")")),
|
||||
),
|
||||
|(head, tail)| {
|
||||
let enumeration = vec![vec![head], tail].concat();
|
||||
Enumeration(enumeration)
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum DefaultDecl<'s> {
|
||||
Required,
|
||||
Implied,
|
||||
Fixed(AttValue<'s>),
|
||||
}
|
||||
/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
|
||||
pub fn default_decl(input: &str) -> IResult<&str, DefaultDecl> {
|
||||
alt((
|
||||
value(DefaultDecl::Required, tag("#REQUIRED")),
|
||||
value(DefaultDecl::Implied, tag("#IMPLIED")),
|
||||
map(
|
||||
preceded(opt(pair(tag("#FIXED"), s)), att_value),
|
||||
|att_value| DefaultDecl::Fixed(att_value),
|
||||
),
|
||||
))(input)
|
||||
}
|
||||
|
||||
enum ConditionalSect<'s> {
|
||||
IncludeSect(IncludeSect<'s>),
|
||||
IgnoreSect(IgnoreSect<'s>),
|
||||
}
|
||||
/// [61] conditionalSect ::= includeSect | ignoreSect
|
||||
pub fn conditional_sect(input: &str) -> IResult<&str, ConditionalSect> {
|
||||
alt((
|
||||
map(include_sect, |include_sect| {
|
||||
ConditionalSect::IncludeSect(include_sect)
|
||||
}),
|
||||
map(ignore_sect, |ignore_sect| {
|
||||
ConditionalSect::IgnoreSect(ignore_sect)
|
||||
}),
|
||||
))(input)
|
||||
}
|
||||
|
||||
struct IncludeSect<'s>(ExtSubsetDecl<'s>);
|
||||
/// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
|
||||
pub fn include_sect(input: &str) -> IResult<&str, IncludeSect> {
|
||||
map(
|
||||
delimited(
|
||||
tuple((tag("<!["), opt(s), tag("INCLUDE"), opt(s), tag("["))),
|
||||
ext_subset_decl,
|
||||
tag("]]>"),
|
||||
),
|
||||
|ext_subset_decl| IncludeSect(ext_subset_decl),
|
||||
)(input)
|
||||
}
|
||||
|
||||
struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);
|
||||
/// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
|
||||
pub fn ignore_sect(input: &str) -> IResult<&str, IgnoreSect> {
|
||||
map(
|
||||
delimited(
|
||||
tuple((tag("<!["), opt(s), tag("IGNORE"), opt(s), tag("["))),
|
||||
many0(ignore_sect_contents),
|
||||
tag("]]>"),
|
||||
),
|
||||
|ignore_sect_contents| IgnoreSect(ignore_sect_contents),
|
||||
)(input)
|
||||
}
|
||||
|
||||
struct IgnoreSectContents<'s> {
|
||||
// TODO: what the fuck does this mean
|
||||
ignore: Ignore<'s>,
|
||||
ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,
|
||||
}
|
||||
/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
|
||||
pub fn ignore_sect_contents(input: &str) -> IResult<&str, IgnoreSectContents> {
|
||||
map(
|
||||
pair(
|
||||
ignore,
|
||||
many0(tuple((
|
||||
delimited(tag("<!["), ignore_sect_contents, tag("]]>")),
|
||||
ignore,
|
||||
))),
|
||||
),
|
||||
|(ignore, ignore_list)| IgnoreSectContents {
|
||||
ignore,
|
||||
ignore_list,
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
type Ignore<'s> = &'s str;
|
||||
/// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
|
||||
pub fn ignore(input: &str) -> IResult<&str, Ignore> {
|
||||
recognize(many_till(xmlchar, peek(alt((tag("<!["), tag("]]>"))))))(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum CharRef<'s> {
|
||||
Decimal(&'s str),
|
||||
Hexadecimal(&'s str),
|
||||
|
@ -801,6 +1039,7 @@ pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
|
|||
))(input)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum Reference<'s> {
|
||||
EntityRef(EntityRef<'s>),
|
||||
CharRef(CharRef<'s>),
|
||||
|
|
Loading…
Reference in New Issue