WIP: extSubset
This commit is contained in:
parent
afda87a8d7
commit
0b11cbbfd8
208
src/parser.rs
208
src/parser.rs
|
@ -2,7 +2,7 @@ use std::char;
|
|||
|
||||
use nom::{
|
||||
branch::{alt, permutation},
|
||||
bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until},
|
||||
bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until, take_while},
|
||||
character::{
|
||||
complete::one_of,
|
||||
streaming::{alpha1, char, digit1, none_of, satisfy},
|
||||
|
@ -16,6 +16,8 @@ use nom::{
|
|||
|
||||
// parser: parses tokens from lexer into events
|
||||
// no well formedness, validity, or data model, simple translation of input into rust types
|
||||
// output is a rust representation of the input xml
|
||||
// types could be used for xml production too?
|
||||
|
||||
enum ContentItem<'s> {
|
||||
CharData(&'s str),
|
||||
|
@ -89,37 +91,73 @@ pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {
|
|||
recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
|
||||
}
|
||||
|
||||
type EntityValue<'s> = &'s str;
|
||||
enum LiteralData<'s> {
|
||||
String(&'s str),
|
||||
PEReference(PEReference<'s>),
|
||||
Reference(Reference<'s>),
|
||||
}
|
||||
|
||||
type EntityValue<'s> = Vec<LiteralData<'s>>;
|
||||
/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
|
||||
/// | "'" ([^%&'] | PEReference | Reference)* "'"
|
||||
pub fn entity_value(input: &str) -> IResult<&str, EntityValue> {
|
||||
alt((
|
||||
delimited(
|
||||
char('"'),
|
||||
recognize(many0(alt((none_of("%&\""), pe_reference, reference)))),
|
||||
many0(alt((
|
||||
map(
|
||||
recognize(many_till(take(1usize), peek(one_of("%&\"")))),
|
||||
|string| LiteralData::String(string),
|
||||
),
|
||||
map(pe_reference, |pe_reference| {
|
||||
LiteralData::PEReference(pe_reference)
|
||||
}),
|
||||
map(reference, |reference| LiteralData::Reference(reference)),
|
||||
))),
|
||||
char('"'),
|
||||
),
|
||||
delimited(
|
||||
char('\''),
|
||||
recognize(many0(alt((none_of("%&'"), pe_reference, reference)))),
|
||||
many0(alt((
|
||||
map(
|
||||
recognize(many_till(take(1usize), peek(one_of("%&'")))),
|
||||
|string| LiteralData::String(string),
|
||||
),
|
||||
map(pe_reference, |pe_reference| {
|
||||
LiteralData::PEReference(pe_reference)
|
||||
}),
|
||||
map(reference, |reference| LiteralData::Reference(reference)),
|
||||
))),
|
||||
char('\''),
|
||||
),
|
||||
))(input)
|
||||
}
|
||||
|
||||
type AttValue<'s> = &'s str;
|
||||
type AttValue<'s> = Vec<LiteralData<'s>>;
|
||||
/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
|
||||
/// | "'" ([^<&'] | Reference)* "'"
|
||||
pub fn att_value(input: &str) -> IResult<&str, AttValue> {
|
||||
alt((
|
||||
delimited(
|
||||
char('"'),
|
||||
recognize(many0(alt((none_of("<&\""), reference)))),
|
||||
many0(alt((
|
||||
map(
|
||||
recognize(many_till(take(1usize), peek(one_of("%&\"")))),
|
||||
|string| LiteralData::String(string),
|
||||
),
|
||||
map(reference, |reference| LiteralData::Reference(reference)),
|
||||
))),
|
||||
char('"'),
|
||||
),
|
||||
delimited(
|
||||
char('\''),
|
||||
recognize(many0(alt((none_of("<&'"), reference)))),
|
||||
many0(alt((
|
||||
map(
|
||||
recognize(many_till(take(1usize), peek(one_of("%&'")))),
|
||||
|string| LiteralData::String(string),
|
||||
),
|
||||
map(reference, |reference| LiteralData::Reference(reference)),
|
||||
))),
|
||||
char('\''),
|
||||
),
|
||||
))(input)
|
||||
|
@ -389,18 +427,18 @@ pub fn decl_sep(input: &str) -> IResult<&str, DeclSep> {
|
|||
))(input)
|
||||
}
|
||||
|
||||
enum IntSubsetItem<'s> {
|
||||
enum IntSubsetDeclaration<'s> {
|
||||
MarkupDecl(MarkupDecl<'s>),
|
||||
DeclSep(DeclSep<'s>),
|
||||
}
|
||||
type IntSubset<'s> = Vec<IntSubsetItem<'s>>;
|
||||
type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;
|
||||
/// [28b] intSubset ::= (markupdecl | DeclSep)*
|
||||
pub fn int_subset(input: &str) -> IResult<&str, IntSubset> {
|
||||
many0(alt((
|
||||
map(markup_decl, |markup_decl| {
|
||||
IntSubsetItem::MarkupDecl(markup_decl)
|
||||
IntSubsetDeclaration::MarkupDecl(markup_decl)
|
||||
}),
|
||||
map(decl_sep, |decl_sep| IntSubsetItem::DeclSep(decl_sep)),
|
||||
map(decl_sep, |decl_sep| IntSubsetDeclaration::DeclSep(decl_sep)),
|
||||
)))(input)
|
||||
}
|
||||
|
||||
|
@ -432,9 +470,39 @@ pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> {
|
|||
))(input)
|
||||
}
|
||||
|
||||
struct ExtSubset<'s> {
|
||||
text_decl: Option<TextDecl<'s>>,
|
||||
ext_subset_decl: ExtSubsetDecl<'s>,
|
||||
}
|
||||
/// [30] extSubset ::= TextDecl? extSubsetDecl
|
||||
pub fn ext_subset(input: &str) -> IResult<&str, ExtSubset> {
|
||||
map(
|
||||
pair(opt(text_decl), ext_subset_decl),
|
||||
|(text_decl, ext_subset_decl)| ExtSubset {
|
||||
text_decl,
|
||||
ext_subset_decl,
|
||||
},
|
||||
)(input)
|
||||
}
|
||||
|
||||
enum ExtSubsetDeclaration<'s> {
|
||||
MarkupDecl(MarkupDecl<'s>),
|
||||
ConditionalSect(ConditionalSect<'s>),
|
||||
DeclSep(DeclSep<'s>),
|
||||
}
|
||||
type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;
|
||||
/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
|
||||
pub fn ext_subset_decl(input: &str) -> IResult<&str, ExtSubsetDecl> {
|
||||
many0(alt((
|
||||
map(markup_decl, |markup_decl| {
|
||||
ExtSubsetDeclaration::MarkupDecl(markup_decl)
|
||||
}),
|
||||
map(conditional_sect, |conditional_sect| {
|
||||
ExtSubsetDeclaration::ConditionalSect(conditional_sect)
|
||||
}),
|
||||
map(decl_sep, |decl_sep| ExtSubsetDeclaration::DeclSep(decl_sep)),
|
||||
)))(input)
|
||||
}
|
||||
|
||||
type SDDecl = bool;
|
||||
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
|
||||
|
@ -458,10 +526,9 @@ pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
|
|||
|
||||
// (Productions 33 through 38 have been removed.)
|
||||
|
||||
struct Element<'s> {
|
||||
name: &'s str,
|
||||
attributes: Vec<Attribute<'s>>,
|
||||
content: Content<'s>,
|
||||
enum Element<'s> {
|
||||
Empty(EmptyElemTag<'s>),
|
||||
NotEmpty(STag<'s>, Content<'s>, ETag<'s>),
|
||||
}
|
||||
/// [39] element ::= EmptyElemTag | STag content ETag
|
||||
pub fn element(input: &str) -> IResult<&str, Element> {
|
||||
|
@ -480,10 +547,29 @@ pub fn attribute(input: &str) -> IResult<&str, Attribute> {
|
|||
separated_pair(name, eq, att_value)(input)
|
||||
}
|
||||
|
||||
type CharRef<'s> = &'s str;
|
||||
enum CharRef<'s> {
|
||||
Decimal(&'s str),
|
||||
Hexadecimal(&'s str),
|
||||
}
|
||||
/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
|
||||
pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
|
||||
todo!()
|
||||
alt((
|
||||
delimited(
|
||||
tag("&#"),
|
||||
map(take_while(|c| matches!(c, '0'..='9')), |decimal| {
|
||||
CharRef::Decimal(decimal)
|
||||
}),
|
||||
tag(";"),
|
||||
),
|
||||
delimited(
|
||||
tag("&#x"),
|
||||
map(
|
||||
take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )),
|
||||
|hexadecimal| CharRef::Hexadecimal(hexadecimal),
|
||||
),
|
||||
tag(";"),
|
||||
),
|
||||
))(input)
|
||||
}
|
||||
|
||||
enum Reference<'s> {
|
||||
|
@ -510,7 +596,86 @@ pub fn pe_reference(input: &str) -> IResult<&str, PEReference> {
|
|||
delimited(tag("%"), name, tag(";"))(input)
|
||||
}
|
||||
|
||||
/// TODO: entity declarations
|
||||
enum EntityDecl<'s> {
|
||||
GEDecl(GEDecl<'s>),
|
||||
PEDecl(PEDecl<'s>),
|
||||
}
|
||||
/// [70] EntityDecl ::= GEDecl | PEDecl
|
||||
pub fn entity_decl(input: &str) -> IResult<&str, EntityDecl> {
|
||||
alt((
|
||||
map(ge_decl, |ge_decl| EntityDecl::GEDecl(ge_decl)),
|
||||
map(pe_decl, |pe_decl| EntityDecl::PEDecl(pe_decl)),
|
||||
))(input)
|
||||
}
|
||||
|
||||
struct GEDecl<'s> {
|
||||
name: Name<'s>,
|
||||
entity_def: EntityDef<'s>,
|
||||
}
|
||||
/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
|
||||
pub fn ge_decl(input: &str) -> IResult<&str, GEDecl> {
|
||||
map(
|
||||
delimited(
|
||||
pair(tag("<!ENTITY"), s),
|
||||
separated_pair(name, s, entity_def),
|
||||
pair(opt(s), tag(">")),
|
||||
),
|
||||
|(name, entity_def)| GEDecl { name, entity_def },
|
||||
)(input)
|
||||
}
|
||||
|
||||
struct PEDecl<'s> {
|
||||
name: Name<'s>,
|
||||
pe_def: PEDef<'s>,
|
||||
}
|
||||
/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
|
||||
pub fn pe_decl(input: &str) -> IResult<&str, PEDecl> {
|
||||
map(
|
||||
delimited(
|
||||
tuple((tag("<!ENTITY"), s, tag("%"), s)),
|
||||
separated_pair(name, s, pe_def),
|
||||
pair(opt(s), tag(">")),
|
||||
),
|
||||
|(name, pe_def)| PEDecl { name, pe_def },
|
||||
)(input)
|
||||
}
|
||||
|
||||
enum EntityDef<'s> {
|
||||
EntityValue(EntityValue<'s>),
|
||||
ExternalID {
|
||||
external_id: ExternalID<'s>,
|
||||
ndata_decl: Option<NDataDecl<'s>>,
|
||||
},
|
||||
}
|
||||
/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
|
||||
pub fn entity_def(input: &str) -> IResult<&str, EntityDef> {
|
||||
alt((
|
||||
map(entity_value, |entity_value| {
|
||||
EntityDef::EntityValue(entity_value)
|
||||
}),
|
||||
map(
|
||||
pair(external_id, opt(ndata_decl)),
|
||||
|(external_id, ndata_decl)| EntityDef::ExternalID {
|
||||
external_id,
|
||||
ndata_decl,
|
||||
},
|
||||
),
|
||||
))(input)
|
||||
}
|
||||
|
||||
enum PEDef<'s> {
|
||||
EntityValue(EntityValue<'s>),
|
||||
ExternalID(ExternalID<'s>),
|
||||
}
|
||||
/// [74] PEDef ::= EntityValue | ExternalID
|
||||
pub fn pe_def(input: &str) -> IResult<&str, PEDef> {
|
||||
alt((
|
||||
map(entity_value, |entity_value| {
|
||||
PEDef::EntityValue(entity_value)
|
||||
}),
|
||||
map(external_id, |external_id| PEDef::ExternalID(external_id)),
|
||||
))(input)
|
||||
}
|
||||
|
||||
enum ExternalID<'s> {
|
||||
SYSTEM {
|
||||
|
@ -567,9 +732,12 @@ pub fn text_decl(input: &str) -> IResult<&str, TextDecl> {
|
|||
)(input)
|
||||
}
|
||||
|
||||
type extParsedEnt<'s> = (Option<TextDecl<'s>>, Content<'s>);
|
||||
struct ExtParsedEnt<'s> {
|
||||
text_decl: Option<TextDecl<'s>>,
|
||||
content: Content<'s>,
|
||||
}
|
||||
/// [78] extParsedEnt ::= TextDecl? content
|
||||
pub fn ext_parsed_ent(input: &str) -> IResult<&str, extParsedEnt> {
|
||||
pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> {
|
||||
pair(opt(text_decl), content)(input)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue