WIP: dtd garbo
This commit is contained in:
parent
feb13be926
commit
afda87a8d7
280
src/parser.rs
280
src/parser.rs
|
@ -10,11 +10,12 @@ use nom::{
|
||||||
combinator::{cond, map, map_parser, map_res, not, opt, peek, recognize, value, verify},
|
combinator::{cond, map, map_parser, map_res, not, opt, peek, recognize, value, verify},
|
||||||
error::{Error, ErrorKind},
|
error::{Error, ErrorKind},
|
||||||
multi::{many0, many1, many_till},
|
multi::{many0, many1, many_till},
|
||||||
sequence::{delimited, pair, preceded, tuple},
|
sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
|
||||||
Err, IResult, Parser,
|
Err, IResult, Parser,
|
||||||
};
|
};
|
||||||
|
|
||||||
// parser: parses tokens from lexer into events
|
// parser: parses tokens from lexer into events
|
||||||
|
// no well formedness, validity, or data model, simple translation of input into rust types
|
||||||
|
|
||||||
enum ContentItem<'s> {
|
enum ContentItem<'s> {
|
||||||
CharData(&'s str),
|
CharData(&'s str),
|
||||||
|
@ -25,15 +26,6 @@ enum ContentItem<'s> {
|
||||||
|
|
||||||
type Content<'s> = Option<Vec<ContentItem<'s>>>;
|
type Content<'s> = Option<Vec<ContentItem<'s>>>;
|
||||||
|
|
||||||
struct DoctypeDecl<'s> {
|
|
||||||
name: &'s str,
|
|
||||||
// TODO: doctype declaration parsing
|
|
||||||
}
|
|
||||||
///
|
|
||||||
pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
|
type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
|
||||||
/// [1] document ::= prolog element Misc*
|
/// [1] document ::= prolog element Misc*
|
||||||
pub fn document(input: &str) -> IResult<&str, Document> {
|
pub fn document(input: &str) -> IResult<&str, Document> {
|
||||||
|
@ -211,21 +203,20 @@ struct PI<'s> {
|
||||||
}
|
}
|
||||||
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
||||||
pub fn pi(input: &str) -> IResult<&str, PI> {
|
pub fn pi(input: &str) -> IResult<&str, PI> {
|
||||||
let (rest, (target, instruction)) = delimited(
|
map(
|
||||||
tag("<?"),
|
delimited(
|
||||||
pair(
|
tag("<?"),
|
||||||
pi_target,
|
pair(
|
||||||
opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))),
|
pi_target,
|
||||||
|
opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))),
|
||||||
|
),
|
||||||
|
tag("?>"),
|
||||||
),
|
),
|
||||||
tag("?>"),
|
|(target, instruction)| PI {
|
||||||
)(input)?;
|
|
||||||
Ok((
|
|
||||||
rest,
|
|
||||||
PI {
|
|
||||||
target,
|
target,
|
||||||
instruction,
|
instruction,
|
||||||
},
|
},
|
||||||
))
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
type PITarget<'s> = &'s str;
|
type PITarget<'s> = &'s str;
|
||||||
|
@ -288,21 +279,18 @@ struct XMLDecl<'s> {
|
||||||
}
|
}
|
||||||
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
|
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
|
||||||
pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {
|
pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {
|
||||||
// (VersionInfo, Option<EncodingDecl>, Option<SDDecl>)
|
map(
|
||||||
let (leftover, (version_info, encoding_decl, sd_decl)) = delimited(
|
delimited(
|
||||||
tag("<?xml"),
|
tag("<?xml"),
|
||||||
tuple((version_info, opt(encoding_decl), opt(sd_decl))),
|
tuple((version_info, opt(encoding_decl), opt(sd_decl))),
|
||||||
pair(opt(s), tag("?>")),
|
pair(opt(s), tag("?>")),
|
||||||
)(input)?;
|
),
|
||||||
// TODO: change to map
|
|(version_info, encoding_decl, sd_decl)| XMLDecl {
|
||||||
Ok((
|
|
||||||
leftover,
|
|
||||||
XMLDecl {
|
|
||||||
version_info,
|
version_info,
|
||||||
encoding_decl,
|
encoding_decl,
|
||||||
sd_decl,
|
sd_decl,
|
||||||
},
|
},
|
||||||
))
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
type VersionInfo = VersionNum;
|
type VersionInfo = VersionNum;
|
||||||
|
@ -342,6 +330,7 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> {
|
||||||
enum Misc<'s> {
|
enum Misc<'s> {
|
||||||
Comment(Comment<'s>),
|
Comment(Comment<'s>),
|
||||||
PI(PI<'s>),
|
PI(PI<'s>),
|
||||||
|
// TODO: how to deal with whitespace
|
||||||
S,
|
S,
|
||||||
}
|
}
|
||||||
/// [27] Misc ::= Comment | PI | S
|
/// [27] Misc ::= Comment | PI | S
|
||||||
|
@ -353,6 +342,100 @@ pub fn misc(input: &str) -> IResult<&str, Misc> {
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct DoctypeDecl<'s> {
|
||||||
|
name: &'s str,
|
||||||
|
external_id: Option<ExternalID<'s>>,
|
||||||
|
int_subset: Option<IntSubset<'s>>,
|
||||||
|
}
|
||||||
|
/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
|
||||||
|
pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
pair(tag("<!DOCTYPE"), s),
|
||||||
|
tuple((
|
||||||
|
name,
|
||||||
|
opt(preceded(s, external_id)),
|
||||||
|
preceded(
|
||||||
|
opt(s),
|
||||||
|
opt(terminated(
|
||||||
|
delimited(tag("["), int_subset, tag("]")),
|
||||||
|
opt(s),
|
||||||
|
)),
|
||||||
|
),
|
||||||
|
)),
|
||||||
|
tag(">"),
|
||||||
|
),
|
||||||
|
|(name, external_id, int_subset)| DoctypeDecl {
|
||||||
|
name,
|
||||||
|
external_id,
|
||||||
|
int_subset,
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
enum DeclSep<'s> {
|
||||||
|
PEReference(PEReference<'s>),
|
||||||
|
// TODO: tackle whitespace
|
||||||
|
S,
|
||||||
|
}
|
||||||
|
/// [28a] DeclSep ::= PEReference | S
|
||||||
|
pub fn decl_sep(input: &str) -> IResult<&str, DeclSep> {
|
||||||
|
alt((
|
||||||
|
map(pe_reference, |pe_reference| {
|
||||||
|
DeclSep::PEReference(pe_reference)
|
||||||
|
}),
|
||||||
|
value(DeclSep::S, s),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
enum IntSubsetItem<'s> {
|
||||||
|
MarkupDecl(MarkupDecl<'s>),
|
||||||
|
DeclSep(DeclSep<'s>),
|
||||||
|
}
|
||||||
|
type IntSubset<'s> = Vec<IntSubsetItem<'s>>;
|
||||||
|
/// [28b] intSubset ::= (markupdecl | DeclSep)*
|
||||||
|
pub fn int_subset(input: &str) -> IResult<&str, IntSubset> {
|
||||||
|
many0(alt((
|
||||||
|
map(markup_decl, |markup_decl| {
|
||||||
|
IntSubsetItem::MarkupDecl(markup_decl)
|
||||||
|
}),
|
||||||
|
map(decl_sep, |decl_sep| IntSubsetItem::DeclSep(decl_sep)),
|
||||||
|
)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
enum MarkupDecl<'s> {
|
||||||
|
ElementDecl(ElementDecl<'s>),
|
||||||
|
AttlistDecl(AttlistDecl<'s>),
|
||||||
|
EntityDecl(EntityDecl<'s>),
|
||||||
|
NotationDecl(NotationDecl<'s>),
|
||||||
|
PI(PI<'s>),
|
||||||
|
Comment(Comment<'s>),
|
||||||
|
}
|
||||||
|
/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
|
||||||
|
pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> {
|
||||||
|
alt((
|
||||||
|
map(element_decl, |element_decl| {
|
||||||
|
MarkupDecl::ElementDecl(element_decl)
|
||||||
|
}),
|
||||||
|
map(attlist_decl, |attlist_decl| {
|
||||||
|
MarkupDecl::AttlistDecl(attlist_decl)
|
||||||
|
}),
|
||||||
|
map(entity_decl, |entity_decl| {
|
||||||
|
MarkupDecl::EntityDecl(entity_decl)
|
||||||
|
}),
|
||||||
|
map(notation_decl, |notation_decl| {
|
||||||
|
MarkupDecl::NotationDecl(notation_decl)
|
||||||
|
}),
|
||||||
|
map(pi, |pi| MarkupDecl::PI(pi)),
|
||||||
|
map(comment, |comment| MarkupDecl::Comment(comment)),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// [30] extSubset ::= TextDecl? extSubsetDecl
|
||||||
|
|
||||||
|
/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
|
||||||
|
|
||||||
type SDDecl = bool;
|
type SDDecl = bool;
|
||||||
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
|
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
|
||||||
pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
|
pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
|
||||||
|
@ -388,18 +471,106 @@ pub fn element(input: &str) -> IResult<&str, Element> {
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
let
|
// let STag<'s> = (Name<'s>, );
|
||||||
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
|
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
|
||||||
|
|
||||||
type Attribute<'s> = (&'s str, &'s str)
|
type Attribute<'s> = (Name<'s>, AttValue<'s>);
|
||||||
/// [41] Attribute ::= Name Eq AttValue
|
/// [41] Attribute ::= Name Eq AttValue
|
||||||
|
pub fn attribute(input: &str) -> IResult<&str, Attribute> {
|
||||||
|
separated_pair(name, eq, att_value)(input)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn reference(input: &str) -> IResult<&str, char> {
|
type CharRef<'s> = &'s str;
|
||||||
|
/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
|
||||||
|
pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn pe_reference(input: &str) -> IResult<&str, char> {
|
enum Reference<'s> {
|
||||||
todo!()
|
EntityRef(EntityRef<'s>),
|
||||||
|
CharRef(CharRef<'s>),
|
||||||
|
}
|
||||||
|
/// [67] Reference ::= EntityRef | CharRef
|
||||||
|
pub fn reference(input: &str) -> IResult<&str, Reference> {
|
||||||
|
alt((
|
||||||
|
map(entity_ref, |entity_ref| Reference::EntityRef(entity_ref)),
|
||||||
|
map(char_ref, |char_ref| Reference::CharRef(char_ref)),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
type EntityRef<'s> = &'s str;
|
||||||
|
/// [68] EntityRef ::= '&' Name ';'
|
||||||
|
pub fn entity_ref(input: &str) -> IResult<&str, EntityRef> {
|
||||||
|
delimited(tag("&"), name, tag(";"))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
type PEReference<'s> = &'s str;
|
||||||
|
/// [69] PEReference ::= '%' Name ';'
|
||||||
|
pub fn pe_reference(input: &str) -> IResult<&str, PEReference> {
|
||||||
|
delimited(tag("%"), name, tag(";"))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// TODO: entity declarations
|
||||||
|
|
||||||
|
enum ExternalID<'s> {
|
||||||
|
SYSTEM {
|
||||||
|
system_identifier: &'s str,
|
||||||
|
},
|
||||||
|
PUBLIC {
|
||||||
|
public_identifier: &'s str,
|
||||||
|
system_identifier: &'s str,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
/// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
|
||||||
|
// pub fn external_id(input: &str) -> IResult<&str, ExternalID> {
|
||||||
|
pub fn external_id(input: &str) -> IResult<&str, ExternalID> {
|
||||||
|
alt((
|
||||||
|
map(
|
||||||
|
preceded(pair(tag("SYSTEM"), s), system_literal),
|
||||||
|
|system_identifier| ExternalID::SYSTEM { system_identifier },
|
||||||
|
),
|
||||||
|
map(
|
||||||
|
preceded(
|
||||||
|
pair(tag("PUBLIC"), s),
|
||||||
|
separated_pair(pubid_literal, s, system_literal),
|
||||||
|
),
|
||||||
|
|(public_identifier, system_identifier)| ExternalID::PUBLIC {
|
||||||
|
public_identifier,
|
||||||
|
system_identifier,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
type NDataDecl<'s> = &'s str;
|
||||||
|
/// [76] NDataDecl ::= S 'NDATA' S Name
|
||||||
|
pub fn ndata_decl(input: &str) -> IResult<&str, NDataDecl> {
|
||||||
|
preceded(tuple((s, tag("NDATA"), s)), name)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TextDecl<'s> {
|
||||||
|
version_info: Option<VersionInfo>,
|
||||||
|
encoding_decl: EncodingDecl<'s>,
|
||||||
|
}
|
||||||
|
/// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
|
||||||
|
pub fn text_decl(input: &str) -> IResult<&str, TextDecl> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
tag("<?xml"),
|
||||||
|
pair(opt(version_info), terminated(encoding_decl, opt(s))),
|
||||||
|
tag("?>"),
|
||||||
|
),
|
||||||
|
|(version_info, encoding_decl)| TextDecl {
|
||||||
|
version_info,
|
||||||
|
encoding_decl,
|
||||||
|
},
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
type extParsedEnt<'s> = (Option<TextDecl<'s>>, Content<'s>);
|
||||||
|
/// [78] extParsedEnt ::= TextDecl? content
|
||||||
|
pub fn ext_parsed_ent(input: &str) -> IResult<&str, extParsedEnt> {
|
||||||
|
pair(opt(text_decl), content)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
type EncodingDecl<'s> = EncName<'s>;
|
type EncodingDecl<'s> = EncName<'s>;
|
||||||
|
@ -425,6 +596,41 @@ pub fn enc_name(input: &str) -> IResult<&str, EncName> {
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct NotationDecl<'s> {
|
||||||
|
name: &'s str,
|
||||||
|
id: NotationDeclID<'s>,
|
||||||
|
}
|
||||||
|
enum NotationDeclID<'s> {
|
||||||
|
External(ExternalID<'s>),
|
||||||
|
Public(PublicID<'s>),
|
||||||
|
}
|
||||||
|
/// [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
|
||||||
|
pub fn notation_decl(input: &str) -> IResult<&str, NotationDecl> {
|
||||||
|
map(
|
||||||
|
delimited(
|
||||||
|
pair(tag("<!NOTATION"), s),
|
||||||
|
separated_pair(
|
||||||
|
name,
|
||||||
|
s,
|
||||||
|
alt((
|
||||||
|
map(external_id, |external_id| {
|
||||||
|
NotationDeclID::External(external_id)
|
||||||
|
}),
|
||||||
|
map(public_id, |public_id| NotationDeclID::Public(public_id)),
|
||||||
|
)),
|
||||||
|
),
|
||||||
|
pair(opt(s), tag(">")),
|
||||||
|
),
|
||||||
|
|(name, id)| NotationDecl { name, id },
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
type PublicID<'s> = &'s str;
|
||||||
|
/// [83] PublicID ::= 'PUBLIC' S PubidLiteral
|
||||||
|
pub fn public_id(input: &str) -> IResult<&str, PublicID> {
|
||||||
|
preceded(pair(tag("PUBLIC"), s), pubid_literal)(input)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::num::NonZero;
|
use std::num::NonZero;
|
||||||
|
|
Loading…
Reference in New Issue