WIP: extSubset
This commit is contained in:
parent
afda87a8d7
commit
0b11cbbfd8
208
src/parser.rs
208
src/parser.rs
|
@ -2,7 +2,7 @@ use std::char;
|
||||||
|
|
||||||
use nom::{
|
use nom::{
|
||||||
branch::{alt, permutation},
|
branch::{alt, permutation},
|
||||||
bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until},
|
bytes::streaming::{is_a, is_not, tag, tag_no_case, take, take_till, take_until, take_while},
|
||||||
character::{
|
character::{
|
||||||
complete::one_of,
|
complete::one_of,
|
||||||
streaming::{alpha1, char, digit1, none_of, satisfy},
|
streaming::{alpha1, char, digit1, none_of, satisfy},
|
||||||
|
@ -16,6 +16,8 @@ use nom::{
|
||||||
|
|
||||||
// parser: parses tokens from lexer into events
|
// parser: parses tokens from lexer into events
|
||||||
// no well formedness, validity, or data model, simple translation of input into rust types
|
// no well formedness, validity, or data model, simple translation of input into rust types
|
||||||
|
// output is a rust representation of the input xml
|
||||||
|
// types could be used for xml production too?
|
||||||
|
|
||||||
enum ContentItem<'s> {
|
enum ContentItem<'s> {
|
||||||
CharData(&'s str),
|
CharData(&'s str),
|
||||||
|
@ -89,37 +91,73 @@ pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {
|
||||||
recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
|
recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
type EntityValue<'s> = &'s str;
|
enum LiteralData<'s> {
|
||||||
|
String(&'s str),
|
||||||
|
PEReference(PEReference<'s>),
|
||||||
|
Reference(Reference<'s>),
|
||||||
|
}
|
||||||
|
|
||||||
|
type EntityValue<'s> = Vec<LiteralData<'s>>;
|
||||||
/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
|
/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
|
||||||
/// | "'" ([^%&'] | PEReference | Reference)* "'"
|
/// | "'" ([^%&'] | PEReference | Reference)* "'"
|
||||||
pub fn entity_value(input: &str) -> IResult<&str, EntityValue> {
|
pub fn entity_value(input: &str) -> IResult<&str, EntityValue> {
|
||||||
alt((
|
alt((
|
||||||
delimited(
|
delimited(
|
||||||
char('"'),
|
char('"'),
|
||||||
recognize(many0(alt((none_of("%&\""), pe_reference, reference)))),
|
many0(alt((
|
||||||
|
map(
|
||||||
|
recognize(many_till(take(1usize), peek(one_of("%&\"")))),
|
||||||
|
|string| LiteralData::String(string),
|
||||||
|
),
|
||||||
|
map(pe_reference, |pe_reference| {
|
||||||
|
LiteralData::PEReference(pe_reference)
|
||||||
|
}),
|
||||||
|
map(reference, |reference| LiteralData::Reference(reference)),
|
||||||
|
))),
|
||||||
char('"'),
|
char('"'),
|
||||||
),
|
),
|
||||||
delimited(
|
delimited(
|
||||||
char('\''),
|
char('\''),
|
||||||
recognize(many0(alt((none_of("%&'"), pe_reference, reference)))),
|
many0(alt((
|
||||||
|
map(
|
||||||
|
recognize(many_till(take(1usize), peek(one_of("%&'")))),
|
||||||
|
|string| LiteralData::String(string),
|
||||||
|
),
|
||||||
|
map(pe_reference, |pe_reference| {
|
||||||
|
LiteralData::PEReference(pe_reference)
|
||||||
|
}),
|
||||||
|
map(reference, |reference| LiteralData::Reference(reference)),
|
||||||
|
))),
|
||||||
char('\''),
|
char('\''),
|
||||||
),
|
),
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
type AttValue<'s> = &'s str;
|
type AttValue<'s> = Vec<LiteralData<'s>>;
|
||||||
/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
|
/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
|
||||||
/// | "'" ([^<&'] | Reference)* "'"
|
/// | "'" ([^<&'] | Reference)* "'"
|
||||||
pub fn att_value(input: &str) -> IResult<&str, AttValue> {
|
pub fn att_value(input: &str) -> IResult<&str, AttValue> {
|
||||||
alt((
|
alt((
|
||||||
delimited(
|
delimited(
|
||||||
char('"'),
|
char('"'),
|
||||||
recognize(many0(alt((none_of("<&\""), reference)))),
|
many0(alt((
|
||||||
|
map(
|
||||||
|
recognize(many_till(take(1usize), peek(one_of("%&\"")))),
|
||||||
|
|string| LiteralData::String(string),
|
||||||
|
),
|
||||||
|
map(reference, |reference| LiteralData::Reference(reference)),
|
||||||
|
))),
|
||||||
char('"'),
|
char('"'),
|
||||||
),
|
),
|
||||||
delimited(
|
delimited(
|
||||||
char('\''),
|
char('\''),
|
||||||
recognize(many0(alt((none_of("<&'"), reference)))),
|
many0(alt((
|
||||||
|
map(
|
||||||
|
recognize(many_till(take(1usize), peek(one_of("%&'")))),
|
||||||
|
|string| LiteralData::String(string),
|
||||||
|
),
|
||||||
|
map(reference, |reference| LiteralData::Reference(reference)),
|
||||||
|
))),
|
||||||
char('\''),
|
char('\''),
|
||||||
),
|
),
|
||||||
))(input)
|
))(input)
|
||||||
|
@ -389,18 +427,18 @@ pub fn decl_sep(input: &str) -> IResult<&str, DeclSep> {
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
enum IntSubsetItem<'s> {
|
enum IntSubsetDeclaration<'s> {
|
||||||
MarkupDecl(MarkupDecl<'s>),
|
MarkupDecl(MarkupDecl<'s>),
|
||||||
DeclSep(DeclSep<'s>),
|
DeclSep(DeclSep<'s>),
|
||||||
}
|
}
|
||||||
type IntSubset<'s> = Vec<IntSubsetItem<'s>>;
|
type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;
|
||||||
/// [28b] intSubset ::= (markupdecl | DeclSep)*
|
/// [28b] intSubset ::= (markupdecl | DeclSep)*
|
||||||
pub fn int_subset(input: &str) -> IResult<&str, IntSubset> {
|
pub fn int_subset(input: &str) -> IResult<&str, IntSubset> {
|
||||||
many0(alt((
|
many0(alt((
|
||||||
map(markup_decl, |markup_decl| {
|
map(markup_decl, |markup_decl| {
|
||||||
IntSubsetItem::MarkupDecl(markup_decl)
|
IntSubsetDeclaration::MarkupDecl(markup_decl)
|
||||||
}),
|
}),
|
||||||
map(decl_sep, |decl_sep| IntSubsetItem::DeclSep(decl_sep)),
|
map(decl_sep, |decl_sep| IntSubsetDeclaration::DeclSep(decl_sep)),
|
||||||
)))(input)
|
)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -432,9 +470,39 @@ pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> {
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ExtSubset<'s> {
|
||||||
|
text_decl: Option<TextDecl<'s>>,
|
||||||
|
ext_subset_decl: ExtSubsetDecl<'s>,
|
||||||
|
}
|
||||||
/// [30] extSubset ::= TextDecl? extSubsetDecl
|
/// [30] extSubset ::= TextDecl? extSubsetDecl
|
||||||
|
pub fn ext_subset(input: &str) -> IResult<&str, ExtSubset> {
|
||||||
|
map(
|
||||||
|
pair(opt(text_decl), ext_subset_decl),
|
||||||
|
|(text_decl, ext_subset_decl)| ExtSubset {
|
||||||
|
text_decl,
|
||||||
|
ext_subset_decl,
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ExtSubsetDeclaration<'s> {
|
||||||
|
MarkupDecl(MarkupDecl<'s>),
|
||||||
|
ConditionalSect(ConditionalSect<'s>),
|
||||||
|
DeclSep(DeclSep<'s>),
|
||||||
|
}
|
||||||
|
type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;
|
||||||
/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
|
/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
|
||||||
|
pub fn ext_subset_decl(input: &str) -> IResult<&str, ExtSubsetDecl> {
|
||||||
|
many0(alt((
|
||||||
|
map(markup_decl, |markup_decl| {
|
||||||
|
ExtSubsetDeclaration::MarkupDecl(markup_decl)
|
||||||
|
}),
|
||||||
|
map(conditional_sect, |conditional_sect| {
|
||||||
|
ExtSubsetDeclaration::ConditionalSect(conditional_sect)
|
||||||
|
}),
|
||||||
|
map(decl_sep, |decl_sep| ExtSubsetDeclaration::DeclSep(decl_sep)),
|
||||||
|
)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
type SDDecl = bool;
|
type SDDecl = bool;
|
||||||
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
|
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
|
||||||
|
@ -458,10 +526,9 @@ pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
|
||||||
|
|
||||||
// (Productions 33 through 38 have been removed.)
|
// (Productions 33 through 38 have been removed.)
|
||||||
|
|
||||||
struct Element<'s> {
|
enum Element<'s> {
|
||||||
name: &'s str,
|
Empty(EmptyElemTag<'s>),
|
||||||
attributes: Vec<Attribute<'s>>,
|
NotEmpty(STag<'s>, Content<'s>, ETag<'s>),
|
||||||
content: Content<'s>,
|
|
||||||
}
|
}
|
||||||
/// [39] element ::= EmptyElemTag | STag content ETag
|
/// [39] element ::= EmptyElemTag | STag content ETag
|
||||||
pub fn element(input: &str) -> IResult<&str, Element> {
|
pub fn element(input: &str) -> IResult<&str, Element> {
|
||||||
|
@ -480,10 +547,29 @@ pub fn attribute(input: &str) -> IResult<&str, Attribute> {
|
||||||
separated_pair(name, eq, att_value)(input)
|
separated_pair(name, eq, att_value)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
type CharRef<'s> = &'s str;
|
enum CharRef<'s> {
|
||||||
|
Decimal(&'s str),
|
||||||
|
Hexadecimal(&'s str),
|
||||||
|
}
|
||||||
/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
|
/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
|
||||||
pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
|
pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
|
||||||
todo!()
|
alt((
|
||||||
|
delimited(
|
||||||
|
tag("&#"),
|
||||||
|
map(take_while(|c| matches!(c, '0'..='9')), |decimal| {
|
||||||
|
CharRef::Decimal(decimal)
|
||||||
|
}),
|
||||||
|
tag(";"),
|
||||||
|
),
|
||||||
|
delimited(
|
||||||
|
tag("&#x"),
|
||||||
|
map(
|
||||||
|
take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )),
|
||||||
|
|hexadecimal| CharRef::Hexadecimal(hexadecimal),
|
||||||
|
),
|
||||||
|
tag(";"),
|
||||||
|
),
|
||||||
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Reference<'s> {
|
enum Reference<'s> {
|
||||||
|
@ -510,7 +596,86 @@ pub fn pe_reference(input: &str) -> IResult<&str, PEReference> {
|
||||||
delimited(tag("%"), name, tag(";"))(input)
|
delimited(tag("%"), name, tag(";"))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TODO: entity declarations
|
enum EntityDecl<'s> {
|
||||||
|
GEDecl(GEDecl<'s>),
|
||||||
|
PEDecl(PEDecl<'s>),
|
||||||
|
}
|
||||||
|
/// [70] EntityDecl ::= GEDecl | PEDecl
|
||||||
|
pub fn entity_decl(input: &str) -> IResult<&str, EntityDecl> {
|
||||||
|
alt((
|
||||||
|
map(ge_decl, |ge_decl| EntityDecl::GEDecl(ge_decl)),
|
||||||
|
map(pe_decl, |pe_decl| EntityDecl::PEDecl(pe_decl)),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct GEDecl<'s> {
|
||||||
|
name: Name<'s>,
|
||||||
|
entity_def: EntityDef<'s>,
|
||||||
|
}
|
||||||
|
/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
|
||||||
|
pub fn ge_decl(input: &str) -> IResult<&str, GEDecl> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
pair(tag("<!ENTITY"), s),
|
||||||
|
separated_pair(name, s, entity_def),
|
||||||
|
pair(opt(s), tag(">")),
|
||||||
|
),
|
||||||
|
|(name, entity_def)| GEDecl { name, entity_def },
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct PEDecl<'s> {
|
||||||
|
name: Name<'s>,
|
||||||
|
pe_def: PEDef<'s>,
|
||||||
|
}
|
||||||
|
/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
|
||||||
|
pub fn pe_decl(input: &str) -> IResult<&str, PEDecl> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
tuple((tag("<!ENTITY"), s, tag("%"), s)),
|
||||||
|
separated_pair(name, s, pe_def),
|
||||||
|
pair(opt(s), tag(">")),
|
||||||
|
),
|
||||||
|
|(name, pe_def)| PEDecl { name, pe_def },
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
enum EntityDef<'s> {
|
||||||
|
EntityValue(EntityValue<'s>),
|
||||||
|
ExternalID {
|
||||||
|
external_id: ExternalID<'s>,
|
||||||
|
ndata_decl: Option<NDataDecl<'s>>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
|
||||||
|
pub fn entity_def(input: &str) -> IResult<&str, EntityDef> {
|
||||||
|
alt((
|
||||||
|
map(entity_value, |entity_value| {
|
||||||
|
EntityDef::EntityValue(entity_value)
|
||||||
|
}),
|
||||||
|
map(
|
||||||
|
pair(external_id, opt(ndata_decl)),
|
||||||
|
|(external_id, ndata_decl)| EntityDef::ExternalID {
|
||||||
|
external_id,
|
||||||
|
ndata_decl,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
enum PEDef<'s> {
|
||||||
|
EntityValue(EntityValue<'s>),
|
||||||
|
ExternalID(ExternalID<'s>),
|
||||||
|
}
|
||||||
|
/// [74] PEDef ::= EntityValue | ExternalID
|
||||||
|
pub fn pe_def(input: &str) -> IResult<&str, PEDef> {
|
||||||
|
alt((
|
||||||
|
map(entity_value, |entity_value| {
|
||||||
|
PEDef::EntityValue(entity_value)
|
||||||
|
}),
|
||||||
|
map(external_id, |external_id| PEDef::ExternalID(external_id)),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
enum ExternalID<'s> {
|
enum ExternalID<'s> {
|
||||||
SYSTEM {
|
SYSTEM {
|
||||||
|
@ -567,9 +732,12 @@ pub fn text_decl(input: &str) -> IResult<&str, TextDecl> {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
type extParsedEnt<'s> = (Option<TextDecl<'s>>, Content<'s>);
|
struct ExtParsedEnt<'s> {
|
||||||
|
text_decl: Option<TextDecl<'s>>,
|
||||||
|
content: Content<'s>,
|
||||||
|
}
|
||||||
/// [78] extParsedEnt ::= TextDecl? content
|
/// [78] extParsedEnt ::= TextDecl? content
|
||||||
pub fn ext_parsed_ent(input: &str) -> IResult<&str, extParsedEnt> {
|
pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> {
|
||||||
pair(opt(text_decl), content)(input)
|
pair(opt(text_decl), content)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue