use nom::{ branch::alt, bytes::complete::{is_a, is_not, tag, take, take_while}, character::complete::{char, none_of, one_of, satisfy}, combinator::{map, not, opt, peek, recognize, value}, error::{Error, ErrorKind}, multi::{many0, many1, many_till}, sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, Err, IResult, Parser as NomParser, }; use crate::xml::NSAttName; use super::{ AttDef, AttDefName, AttType, AttValue, AttValueData, AttlistDecl, Attribute, CDEnd, CDSect, CDStart, CData, Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment, ConditionalSect, Content, ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName, DefaultDecl, DoctypeDecl, Document, ETag, Element, Elementdecl, EmptyElemTag, EncName, EncodingDecl, EntityDecl, EntityDef, EntityRef, EntityValue, EntityValueData, EnumeratedType, Enumeration, Eq, ExtParsedEnt, ExtSubset, ExtSubsetDecl, ExtSubsetDeclaration, ExternalID, GEDecl, Ignore, IgnoreSect, IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration, LocalPart, MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names, Nmtoken, Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef, PEReference, PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral, PublicID, QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl, TokenizedType, UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S, }; pub trait Parser<'s> { type Output; fn parse(input: &'s str) -> IResult<&'s str, Self::Output>; fn parse_full(input: &'s str) -> crate::Result { match ::parse(input) { Ok((rest, output)) => { if rest.is_empty() { return Ok(output); } else { return Err(crate::error::Error::ExtraData(rest.to_string())); } } Result::Err(e) => return Err(crate::error::Error::ParseError(e.to_string())), } } } /// [1] NSAttName ::= PrefixedAttName | DefaultAttName impl<'s> Parser<'s> for NSAttName<'s> { type Output = NSAttName<'s>; fn parse(input: &'s str) -> IResult<&'s str, Self::Output> { alt(( map(PrefixedAttName::parse, |prefixed_att_name| { NSAttName::PrefixedAttName(prefixed_att_name) }), value(NSAttName::DefaultAttName, DefaultAttName::parse), ))(input) } } /// [2] PrefixedAttName ::= 'xmlns:' NCName impl<'s> Parser<'s> for PrefixedAttName<'s> { type Output = PrefixedAttName<'s>; fn parse(input: &'s str) -> IResult<&'s str, PrefixedAttName<'s>> { map(preceded(tag("xmlns:"), NCName::parse), |nc_name| { PrefixedAttName(nc_name) })(input) } } /// [3] DefaultAttName ::= 'xmlns'; impl Parser<'_> for DefaultAttName { type Output = DefaultAttName; fn parse(input: &str) -> IResult<&str, DefaultAttName> { value(DefaultAttName, tag("xmlns"))(input) } } /// [5] Name ::= NameStartChar (NameChar)* /// [4] NCName ::= Name - (Char* ':' Char*) impl<'s> Parser<'s> for NCName<'s> { type Output = NCName<'s>; fn parse(input: &'s str) -> IResult<&'s str, NCName<'s>> { let (_rest, name) = peek(recognize(Name::parse))(input)?; if let Some(char) = name.find(':') { map(take(char), |nc_name| NCName(nc_name))(input) } else { map(recognize(Name::parse), |nc_name| NCName(nc_name))(input) } } } /// [7] QName ::= PrefixedName | UnprefixedName impl<'s> Parser<'s> for QName<'s> { type Output = QName<'s>; fn parse(input: &'s str) -> IResult<&'s str, QName<'s>> { alt(( map(PrefixedName::parse, |prefixed_name| { QName::PrefixedName(prefixed_name) }), map(UnprefixedName::parse, |unprefixed_name| { QName::UnprefixedName(unprefixed_name) }), ))(input) } } /// [8] PrefixedName ::= Prefix ':' LocalPart impl<'s> Parser<'s> for PrefixedName<'s> { type Output = PrefixedName<'s>; fn parse(input: &'s str) -> IResult<&'s str, PrefixedName<'s>> { map( separated_pair(Prefix::parse, char(':'), LocalPart::parse), |(prefix, local_part)| PrefixedName { prefix, local_part }, )(input) } } /// [9] UnprefixedName ::= LocalPart impl<'s> Parser<'s> for UnprefixedName<'s> { type Output = UnprefixedName<'s>; fn parse(input: &'s str) -> IResult<&'s str, UnprefixedName<'s>> { map(LocalPart::parse, |local_part| UnprefixedName(local_part))(input) } } /// [10] Prefix ::= NCName impl<'s> Parser<'s> for Prefix<'s> { type Output = Prefix<'s>; fn parse(input: &'s str) -> IResult<&'s str, Prefix<'s>> { map(NCName::parse, |nc_name| Prefix(nc_name))(input) } } /// [11] LocalPart ::= NCName impl<'s> Parser<'s> for LocalPart<'s> { type Output = LocalPart<'s>; fn parse(input: &'s str) -> IResult<&'s str, LocalPart<'s>> { map(NCName::parse, |nc_name| LocalPart(nc_name))(input) } } // xml spec /// [1] document ::= prolog element Misc* impl<'s> Parser<'s> for Document<'s> { type Output = Document<'s>; fn parse(input: &'s str) -> IResult<&'s str, Document<'s>> { tuple((Prolog::parse, Element::parse, many0(Misc::parse)))(input) } } /// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ impl Parser<'_> for Char { type Output = Char; fn parse(input: &str) -> IResult<&str, Char> { map( satisfy( |c| matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}'), ), |char| Char(char), )(input) } } /// [3] S ::= (#x20 | #x9 | #xD | #xA)+ impl Parser<'_> for S { type Output = S; fn parse(input: &str) -> IResult<&str, S> { // TODO?: whitespacing // map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input) value(S, is_a("\u{20}\u{9}\u{D}\u{A}"))(input) } } /// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] impl Parser<'_> for NameStartChar { type Output = NameStartChar; fn parse(input: &str) -> IResult<&str, NameStartChar> { map( satisfy( |c| matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}'), ), |c| NameStartChar(c), )(input) } } /// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] impl Parser<'_> for NameChar { type Output = NameChar; fn parse(input: &str) -> IResult<&str, NameChar> { map( alt(( map(NameStartChar::parse, |NameStartChar(c)| c), satisfy( |c| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'), ), )), |c| NameChar(c), )(input) } } /// [5] Name ::= NameStartChar (NameChar)* impl<'s> Parser<'s> for Name<'s> { type Output = Name<'s>; fn parse(input: &'s str) -> IResult<&'s str, Name<'s>> { map( recognize(pair(NameStartChar::parse, many0(NameChar::parse))), |name| Name(name), )(input) } } /// [6] Names ::= Name (#x20 Name)* impl<'s> Parser<'s> for Names<'s> { type Output = Names<'s>; // TODO: fix fn parse(input: &'s str) -> IResult<&'s str, Names<'s>> { map( pair(Name::parse, many0(preceded(char('\u{20}'), Name::parse))), |(head, tail)| Names(vec![vec![head], tail].concat()), )(input) } } /// [7] Nmtoken ::= (NameChar)+ impl<'s> Parser<'s> for Nmtoken<'s> { type Output = Nmtoken<'s>; fn parse(input: &'s str) -> IResult<&'s str, Nmtoken<'s>> { map(recognize(many1(NameChar::parse)), |nmtoken| { Nmtoken(nmtoken) })(input) } } /// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* impl<'s> Parser<'s> for Nmtokens<'s> { type Output = Nmtokens<'s>; fn parse(input: &'s str) -> IResult<&'s str, Nmtokens<'s>> { map( pair( Nmtoken::parse, many0(preceded(char('\u{20}'), Nmtoken::parse)), ), |(head, tail)| Nmtokens(vec![vec![head], tail].concat()), )(input) } } /// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' /// | "'" ([^%&'] | PEReference | Reference)* "'" impl<'s> Parser<'s> for EntityValue<'s> { type Output = EntityValue<'s>; fn parse(input: &'s str) -> IResult<&'s str, EntityValue<'s>> { alt(( map( delimited( char('"'), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&\"")))), |string| EntityValueData::String(string), ), map(PEReference::parse, |pe_reference| { EntityValueData::PEReference(pe_reference) }), map(Reference::parse, |reference| { EntityValueData::Reference(reference) }), ))), char('"'), ), |entity_value| EntityValue::DoubleQuoted(entity_value), ), map( delimited( char('\''), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&'")))), |string| EntityValueData::String(string), ), map(PEReference::parse, |pe_reference| { EntityValueData::PEReference(pe_reference) }), map(Reference::parse, |reference| { EntityValueData::Reference(reference) }), ))), char('\''), ), |entity_value| EntityValue::SingleQuoted(entity_value), ), ))(input) } } /// [10] AttValue ::= '"' ([^<&"] | Reference)* '"' /// | "'" ([^<&'] | Reference)* "'" impl<'s> Parser<'s> for AttValue<'s> { type Output = AttValue<'s>; fn parse(input: &'s str) -> IResult<&'s str, AttValue<'s>> { alt(( map( delimited( char('"'), many0(alt(( map(is_not("<&\""), |string| AttValueData::String(string)), map(Reference::parse, |reference| { AttValueData::Reference(reference) }), ))), char('"'), ), |att_value| AttValue::DoubleQuoted(att_value), ), map( delimited( char('\''), many0(alt(( map(is_not("<&'"), |string| AttValueData::String(string)), map(Reference::parse, |reference| { AttValueData::Reference(reference) }), ))), char('\''), ), |att_value| AttValue::SingleQuoted(att_value), ), ))(input) } } /// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") impl<'s> Parser<'s> for SystemLiteral<'s> { type Output = SystemLiteral<'s>; fn parse(input: &'s str) -> IResult<&'s str, SystemLiteral<'s>> { alt(( map( delimited(char('"'), recognize(many0(none_of("\""))), char('"')), |system_literal| SystemLiteral::DoubleQuoted(system_literal), ), map( delimited(char('\''), recognize(many0(none_of("'"))), char('\'')), |system_literal| SystemLiteral::SingleQuoted(system_literal), ), ))(input) } } /// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" impl<'s> Parser<'s> for PubidLiteral<'s> { type Output = PubidLiteral<'s>; fn parse(input: &'s str) -> IResult<&'s str, PubidLiteral<'s>> { alt(( map( delimited(char('"'), recognize(many0(PubidChar::parse)), char('"')), |pubid_literal| PubidLiteral::DoubleQuoted(pubid_literal), ), map( delimited( char('\''), recognize(many0(recognize(not(char('\''))).and_then(PubidChar::parse))), char('\''), ), |pubid_literal| PubidLiteral::SingleQuoted(pubid_literal), ), ))(input) } } /// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] impl Parser<'_> for PubidChar { type Output = PubidChar; fn parse(input: &'_ str) -> IResult<&str, PubidChar> { map( satisfy( |c| matches!(c, '\u{20}' | '\u{D}' | '\u{A}' | 'a'..='z' | 'A'..='Z' | '0'..='9'), ), |pubid_char| PubidChar(pubid_char), )(input) } } /// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) impl<'s> Parser<'s> for CharData<'s> { type Output = CharData<'s>; fn parse(input: &'s str) -> IResult<&'s str, CharData<'s>> { map( recognize(many_till( none_of("<&"), peek(alt((recognize(one_of("<&")), tag("]]>")))), )), |char_data| CharData(char_data), )(input) } } /// Comment ::= '' impl<'s> Parser<'s> for Comment<'s> { type Output = Comment<'s>; fn parse(input: &'s str) -> IResult<&'s str, Comment<'s>> { map( delimited( tag(""), ), |comment| Comment(comment), )(input) } } /// [16] PI ::= '' Char*)))? '?>' impl<'s> Parser<'s> for PI<'s> { type Output = PI<'s>; fn parse(input: &'s str) -> IResult<&'s str, PI<'s>> { map( delimited( tag(""))), ))), ), tag("?>"), ), |(target, instruction)| PI { target, instruction, }, )(input) } } /// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) impl<'s> Parser<'s> for PITarget<'s> { type Output = PITarget<'s>; fn parse(input: &'s str) -> IResult<&'s str, PITarget<'s>> { let (rest, name) = Name::parse(input)?; if name.0.to_lowercase() == "xml" { return Err(Err::Error(Error { input, // TODO: check if better error to return code: ErrorKind::Tag, })); } else { return Ok((rest, PITarget(name))); } } } /// [18] CDSect ::= CDStart CData CDEnd impl<'s> Parser<'s> for CDSect<'s> { type Output = CDSect<'s>; fn parse(input: &'s str) -> IResult<&'s str, CDSect<'s>> { map( delimited(CDStart::parse, CData::parse, CDEnd::parse), |c_data| CDSect(c_data), )(input) } } /// [19] CDStart ::= ' for CDStart { type Output = CDStart; fn parse(input: &'_ str) -> IResult<&str, CDStart> { value(CDStart, tag("' Char*)) impl<'s> Parser<'s> for CData<'s> { type Output = CData<'s>; fn parse(input: &'s str) -> IResult<&'s str, CData<'s>> { map( recognize(many_till(Char::parse, peek(tag("]]>")))), |c_data| CData(c_data), )(input) } } /// [21] CDEnd ::= ']]>' impl Parser<'_> for CDEnd { type Output = CDEnd; fn parse(input: &'_ str) -> IResult<&str, CDEnd> { value(CDEnd, tag("]]>"))(input) } } /// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? impl<'s> Parser<'s> for Prolog<'s> { type Output = Prolog<'s>; fn parse(input: &'s str) -> IResult<&'s str, Prolog<'s>> { tuple(( opt(XMLDecl::parse), many0(Misc::parse), opt(tuple((DoctypeDecl::parse, many0(Misc::parse)))), ))(input) } } /// [23] XMLDecl ::= '' impl<'s> Parser<'s> for XMLDecl<'s> { type Output = XMLDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, XMLDecl<'s>> { map( delimited( tag("")), ), |(version_info, encoding_decl, sd_decl)| XMLDecl { version_info, encoding_decl, sd_decl, }, )(input) } } /// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') impl Parser<'_> for VersionInfo { type Output = VersionInfo; fn parse(input: &'_ str) -> IResult<&str, VersionInfo> { preceded( tuple((S::parse, tag("version"), Eq::parse)), alt(( map( delimited(char('\''), VersionNum::parse, char('\'')), |version_info| VersionInfo::SingleQuoted(version_info), ), map( delimited(char('"'), VersionNum::parse, char('"')), |version_info| VersionInfo::DoubleQuoted(version_info), ), )), )(input) } } /// [25] Eq ::= S? '=' S? impl Parser<'_> for Eq { type Output = Eq; fn parse(input: &'_ str) -> IResult<&str, Eq> { value( Eq, recognize(tuple((opt(S::parse), char('='), opt(S::parse)))), )(input) } } /// [26] VersionNum ::= '1.' [0-9]+ impl Parser<'_> for VersionNum { type Output = VersionNum; fn parse(input: &'_ str) -> IResult<&str, VersionNum> { preceded( tag("1."), alt(( value(VersionNum::One, char('0')), value(VersionNum::OneDotOne, char('1')), )), )(input) } } /// [27] Misc ::= Comment | PI | S impl<'s> Parser<'s> for Misc<'s> { type Output = Misc<'s>; fn parse(input: &'s str) -> IResult<&'s str, Misc<'s>> { alt(( map(Comment::parse, |comment| Misc::Comment(comment)), map(PI::parse, |pi| Misc::PI(pi)), value(Misc::S, S::parse), ))(input) } } /// [16] doctypedecl ::= '' /// [28] doctypedecl ::= '' impl<'s> Parser<'s> for DoctypeDecl<'s> { type Output = DoctypeDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, DoctypeDecl<'s>> { map( delimited( pair(tag(""), ), |(name, external_id, int_subset)| DoctypeDecl { name, external_id, int_subset, }, )(input) } } /// [28a] DeclSep ::= PEReference | S impl<'s> Parser<'s> for DeclSep<'s> { type Output = DeclSep<'s>; fn parse(input: &'s str) -> IResult<&'s str, DeclSep<'s>> { alt(( map(PEReference::parse, |pe_reference| { DeclSep::PEReference(pe_reference) }), value(DeclSep::S, S::parse), ))(input) } } /// [28b] intSubset ::= (markupdecl | DeclSep)* impl<'s> Parser<'s> for IntSubset<'s> { type Output = IntSubset<'s>; fn parse(input: &'s str) -> IResult<&'s str, IntSubset<'s>> { many0(alt(( map(MarkupDecl::parse, |markup_decl| { IntSubsetDeclaration::MarkupDecl(markup_decl) }), map(DeclSep::parse, |decl_sep| { IntSubsetDeclaration::DeclSep(decl_sep) }), )))(input) } } /// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment impl<'s> Parser<'s> for MarkupDecl<'s> { type Output = MarkupDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, MarkupDecl<'s>> { alt(( map(Elementdecl::parse, |elementdecl| { MarkupDecl::Elementdecl(elementdecl) }), map(AttlistDecl::parse, |attlist_decl| { MarkupDecl::AttlistDecl(attlist_decl) }), map(EntityDecl::parse, |entity_decl| { MarkupDecl::EntityDecl(entity_decl) }), map(NotationDecl::parse, |notation_decl| { MarkupDecl::NotationDecl(notation_decl) }), map(PI::parse, |pi| MarkupDecl::PI(pi)), map(Comment::parse, |comment| MarkupDecl::Comment(comment)), ))(input) } } /// [30] extSubset ::= TextDecl? extSubsetDecl impl<'s> Parser<'s> for ExtSubset<'s> { type Output = ExtSubset<'s>; fn parse(input: &'s str) -> IResult<&'s str, ExtSubset<'s>> { map( pair(opt(TextDecl::parse), ExtSubsetDecl::parse), |(text_decl, ext_subset_decl)| ExtSubset { text_decl, ext_subset_decl, }, )(input) } } /// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)* impl<'s> Parser<'s> for ExtSubsetDecl<'s> { type Output = ExtSubsetDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, ExtSubsetDecl<'s>> { many0(alt(( map(MarkupDecl::parse, |markup_decl| { ExtSubsetDeclaration::MarkupDecl(markup_decl) }), map(ConditionalSect::parse, |conditional_sect| { ExtSubsetDeclaration::ConditionalSect(conditional_sect) }), map(DeclSep::parse, |decl_sep| { ExtSubsetDeclaration::DeclSep(decl_sep) }), )))(input) } } /// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) impl Parser<'_> for SDDecl { type Output = SDDecl; fn parse(input: &'_ str) -> IResult<&str, SDDecl> { preceded( tuple((S::parse, tag("standalone"), Eq::parse)), alt(( delimited( char('\''), alt(( value(SDDecl::SingleQuoted(true), tag("yes")), value(SDDecl::SingleQuoted(false), tag("no")), )), char('\''), ), delimited( char('"'), alt(( value(SDDecl::DoubleQuoted(true), tag("yes")), value(SDDecl::DoubleQuoted(false), tag("no")), )), char('"'), ), )), )(input) } } // (Productions 33 through 38 have been removed.) /// [39] element ::= EmptyElemTag | STag content ETag impl<'s> Parser<'s> for Element<'s> { type Output = Element<'s>; fn parse(input: &'s str) -> IResult<&'s str, Element<'s>> { alt(( map(EmptyElemTag::parse, |empty_elem_tag| { Element::Empty(empty_elem_tag) }), map( tuple((STag::parse, Content::parse, ETag::parse)), |(s_tag, content, e_tag)| Element::NotEmpty(s_tag, content, e_tag), ), ))(input) } } /// [12] STag ::= '<' QName (S Attribute)* S? '>' /// [40] STag ::= '<' Name (S Attribute)* S? '>' impl<'s> Parser<'s> for STag<'s> { type Output = STag<'s>; fn parse(input: &'s str) -> IResult<&'s str, STag<'s>> { map( delimited( tag("<"), pair(QName::parse, many0(preceded(S::parse, Attribute::parse))), pair(opt(S::parse), tag(">")), ), |(name, attributes)| STag { name, attributes }, )(input) } } /// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue impl<'s> Parser<'s> for Attribute<'s> { type Output = Attribute<'s>; fn parse(input: &'s str) -> IResult<&'s str, Attribute<'s>> { alt(( map( separated_pair(NSAttName::parse, Eq::parse, AttValue::parse), |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value }, ), map( separated_pair(QName::parse, Eq::parse, AttValue::parse), |(name, value)| Attribute::Attribute { name, value }, ), ))(input) } } // pub type Attribute<'s> = (Name<'s>, AttValue<'s>); /// [41] Attribute ::= Name Eq AttValue // pub fn attribute(input: &str) -> IResult<&'s str, Attribute> { // separated_pair(name, eq, att_value)(input) // } /// [13] ETag ::= '' /// [42] ETag ::= '' impl<'s> Parser<'s> for ETag<'s> { type Output = ETag<'s>; fn parse(input: &'s str) -> IResult<&'s str, ETag<'s>> { map( delimited(tag(""))), |name| ETag { name }, )(input) } } impl<'s> Parser<'s> for ContentItem<'s> { type Output = ContentItem<'s>; fn parse(input: &'s str) -> IResult<&'s str, ContentItem<'s>> { alt(( map(Element::parse, |element| ContentItem::Element(element)), map(Reference::parse, |reference| { ContentItem::Reference(reference) }), map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)), map(PI::parse, |pi| ContentItem::PI(pi)), map(Comment::parse, |comment| ContentItem::Comment(comment)), ))(input) } } /// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* impl<'s> Parser<'s> for Content<'s> { type Output = Content<'s>; fn parse(input: &'s str) -> IResult<&'s str, Content<'s>> { map( pair( opt(CharData::parse), many0(pair( alt(( map(Element::parse, |element| ContentItem::Element(element)), map(Reference::parse, |reference| { ContentItem::Reference(reference) }), map(CDSect::parse, |cd_sect| ContentItem::CDSect(cd_sect)), map(PI::parse, |pi| ContentItem::PI(pi)), map(Comment::parse, |comment| ContentItem::Comment(comment)), )), opt(CharData::parse), )), ), |(char_data, content)| Content { char_data, content }, )(input) } } /// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' /// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec] impl<'s> Parser<'s> for EmptyElemTag<'s> { type Output = EmptyElemTag<'s>; fn parse(input: &'s str) -> IResult<&'s str, EmptyElemTag<'s>> { map( delimited( tag("<"), pair(QName::parse, many0(preceded(S::parse, Attribute::parse))), pair(opt(S::parse), tag("/>")), ), |(name, attributes)| EmptyElemTag { name, attributes }, )(input) } } /// [17] elementdecl ::= '' /// [45] elementdecl ::= '' impl<'s> Parser<'s> for Elementdecl<'s> { type Output = Elementdecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, Elementdecl<'s>> { map( delimited( pair(tag("")), ), |(name, contentspec)| Elementdecl { name, contentspec }, )(input) } } /// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children impl<'s> Parser<'s> for Contentspec<'s> { type Output = Contentspec<'s>; fn parse(input: &'s str) -> IResult<&'s str, Contentspec<'s>> { alt(( value(Contentspec::Empty, tag("EMPTY")), value(Contentspec::Any, tag("ANY")), map(Mixed::parse, |mixed| Contentspec::Mixed(mixed)), map(Children::parse, |children| Contentspec::Children(children)), ))(input) } } /// Occurence ::= ('?' | '*' | '+')? impl Parser<'_> for Occurence { type Output = Occurence; fn parse(input: &'_ str) -> IResult<&str, Occurence> { map( opt(alt((tag("?"), tag("*"), tag("+")))), |occurence| match occurence { Some("?") => Occurence::Optional, Some("*") => Occurence::Many0, Some("+") => Occurence::Many1, _ => Occurence::Once, }, )(input) } } /// [47] children ::= (choice | seq) ('?' | '*' | '+')? impl<'s> Parser<'s> for Children<'s> { type Output = Children<'s>; fn parse(input: &'s str) -> IResult<&'s str, Children<'s>> { map( pair( alt(( map(Choice::parse, |choice| ChildrenKind::Choice(choice)), map(Seq::parse, |seq| ChildrenKind::Seq(seq)), )), Occurence::parse, ), |(kind, occurence)| Children { kind, occurence }, )(input) } } /// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')? /// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? impl<'s> Parser<'s> for Cp<'s> { type Output = Cp<'s>; fn parse(input: &'s str) -> IResult<&'s str, Cp<'s>> { map( pair( alt(( map(QName::parse, |name| CpKind::Name(name)), map(Choice::parse, |choice| CpKind::Choice(choice)), map(Seq::parse, |seq| CpKind::Seq(seq)), )), Occurence::parse, ), |(kind, occurence)| Cp { kind, occurence }, )(input) } } /// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' impl<'s> Parser<'s> for Choice<'s> { type Output = Choice<'s>; fn parse(input: &'s str) -> IResult<&'s str, Choice<'s>> { map( delimited( pair(tag("("), opt(S::parse)), pair( Cp::parse, many1(preceded( tuple((opt(S::parse), tag("|"), opt(S::parse))), Cp::parse, )), ), pair(opt(S::parse), tag(")")), ), |(head, tail)| { let choice = vec![vec![head], tail].concat(); Choice(choice) }, )(input) } } /// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' impl<'s> Parser<'s> for Seq<'s> { type Output = Seq<'s>; fn parse(input: &'s str) -> IResult<&'s str, Seq<'s>> { map( delimited( pair(tag("("), opt(S::parse)), pair( Cp::parse, many0(preceded( tuple((opt(S::parse), tag(","), opt(S::parse))), Cp::parse, )), ), pair(opt(S::parse), tag(")")), ), |(head, tail)| { let seq = vec![vec![head], tail].concat(); Seq(seq) }, )(input) } } /// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')' /// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' impl<'s> Parser<'s> for Mixed<'s> { type Output = Mixed<'s>; fn parse(input: &'s str) -> IResult<&'s str, Mixed<'s>> { alt(( map( delimited( tuple((tag("("), S::parse, tag("#PCDATA"))), many0(preceded( tuple((opt(S::parse), tag("|"), opt(S::parse))), QName::parse, )), pair(opt(S::parse), tag(")*")), ), |names| Mixed(names), ), value( Mixed(Vec::new()), tuple(( tag("("), opt(S::parse), tag("#PCDATA"), opt(S::parse), tag(")"), )), ), ))(input) } } /// [20] AttlistDecl ::= '' /// [52] AttlistDecl ::= '' impl<'s> Parser<'s> for AttlistDecl<'s> { type Output = AttlistDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, AttlistDecl<'s>> { map( delimited( pair(tag("")), ), |(element_type, att_defs)| AttlistDecl { element_type, att_defs, }, )(input) } } /// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl /// [53] AttDef ::= S Name S AttType S DefaultDecl impl<'s> Parser<'s> for AttDef<'s> { type Output = AttDef<'s>; fn parse(input: &'s str) -> IResult<&'s str, AttDef<'s>> { map( tuple(( preceded( S::parse, alt(( map(QName::parse, |q_name| AttDefName::QName(q_name)), map(NSAttName::parse, |ns_att_name| { AttDefName::NSAttName(ns_att_name) }), )), ), preceded(S::parse, AttType::parse), preceded(S::parse, DefaultDecl::parse), )), |(name, att_type, default_decl)| AttDef { name, att_type, default_decl, }, )(input) } } /// [54] AttType ::= StringType | TokenizedType | EnumeratedType impl<'s> Parser<'s> for AttType<'s> { type Output = AttType<'s>; fn parse(input: &'s str) -> IResult<&'s str, AttType<'s>> { alt(( value(AttType::StringType, StringType::parse), map(TokenizedType::parse, |tokenized_type| { AttType::TokenizedType(tokenized_type) }), map(EnumeratedType::parse, |enumerated_type| { AttType::EnumeratedType(enumerated_type) }), ))(input) } } /// [55] StringType ::= 'CDATA' impl Parser<'_> for StringType { type Output = StringType; fn parse(input: &'_ str) -> IResult<&str, StringType> { value(StringType, tag("CDATA"))(input) } } /// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' impl Parser<'_> for TokenizedType { type Output = TokenizedType; fn parse(input: &'_ str) -> IResult<&str, TokenizedType> { alt(( value(TokenizedType::ID, tag("ID")), // try idrefs first to avoid losing 'S' value(TokenizedType::IDRefs, tag("IDREFS")), value(TokenizedType::IDRef, tag("IDREF")), value(TokenizedType::Entity, tag("ENTITY")), value(TokenizedType::Entities, tag("ENTITIES")), // same here value(TokenizedType::NMTokens, tag("NMTOKENS")), value(TokenizedType::NMToken, tag("NMTOKEN")), ))(input) } } /// [57] EnumeratedType ::= NotationType | Enumeration impl<'s> Parser<'s> for EnumeratedType<'s> { type Output = EnumeratedType<'s>; fn parse(input: &'s str) -> IResult<&'s str, EnumeratedType<'s>> { alt(( map(NotationType::parse, |notation_type| { EnumeratedType::NotationType(notation_type) }), map(Enumeration::parse, |enumeration| { EnumeratedType::Enumeration(enumeration) }), ))(input) } } /// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' impl<'s> Parser<'s> for NotationType<'s> { type Output = NotationType<'s>; fn parse(input: &'s str) -> IResult<&'s str, NotationType<'s>> { map( delimited( tuple((tag("NOTATION"), S::parse, tag("("), opt(S::parse))), pair( Name::parse, many0(preceded( tuple((opt(S::parse), tag("|"), opt(S::parse))), Name::parse, )), ), pair(opt(S::parse), tag(")")), ), |(head, tail)| { let notation_type = vec![vec![head], tail].concat(); NotationType(notation_type) }, )(input) } } /// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' impl<'s> Parser<'s> for Enumeration<'s> { type Output = Enumeration<'s>; fn parse(input: &'s str) -> IResult<&'s str, Enumeration<'s>> { map( delimited( pair(tag("("), opt(S::parse)), pair( Nmtoken::parse, many0(preceded( tuple((opt(S::parse), tag("|"), opt(S::parse))), Nmtoken::parse, )), ), pair(opt(S::parse), tag(")")), ), |(head, tail)| { let enumeration = vec![vec![head], tail].concat(); Enumeration(enumeration) }, )(input) } } /// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) impl<'s> Parser<'s> for DefaultDecl<'s> { type Output = DefaultDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, DefaultDecl<'s>> { alt(( value(DefaultDecl::Required, tag("#REQUIRED")), value(DefaultDecl::Implied, tag("#IMPLIED")), map( pair(opt(pair(tag("#FIXED"), S::parse)), AttValue::parse), |(must, att_value)| DefaultDecl::Fixed(must.is_some(), att_value), ), ))(input) } } /// [61] conditionalSect ::= includeSect | ignoreSect impl<'s> Parser<'s> for ConditionalSect<'s> { type Output = ConditionalSect<'s>; fn parse(input: &'s str) -> IResult<&'s str, ConditionalSect<'s>> { alt(( map(IncludeSect::parse, |include_sect| { ConditionalSect::IncludeSect(include_sect) }), map(IgnoreSect::parse, |ignore_sect| { ConditionalSect::IgnoreSect(ignore_sect) }), ))(input) } } /// [62] includeSect ::= '' impl<'s> Parser<'s> for IncludeSect<'s> { type Output = IncludeSect<'s>; fn parse(input: &'s str) -> IResult<&'s str, IncludeSect<'s>> { map( delimited( tuple(( tag(""), ), |ext_subset_decl| IncludeSect(ext_subset_decl), )(input) } } /// [63] ignoreSect ::= '' impl<'s> Parser<'s> for IgnoreSect<'s> { type Output = IgnoreSect<'s>; fn parse(input: &'s str) -> IResult<&'s str, IgnoreSect<'s>> { map( delimited( tuple(( tag(""), ), |ignore_sect_contents| IgnoreSect(ignore_sect_contents), )(input) } } /// [64] ignoreSectContents ::= Ignore ('' Ignore)* impl<'s> Parser<'s> for IgnoreSectContents<'s> { type Output = IgnoreSectContents<'s>; fn parse(input: &'s str) -> IResult<&'s str, IgnoreSectContents<'s>> { map( pair( Ignore::parse, many0(tuple(( delimited(tag("")), Ignore::parse, ))), ), |(ignore, ignore_list)| IgnoreSectContents { ignore, ignore_list, }, )(input) } } /// [65] Ignore ::= Char* - (Char* ('') Char*) impl<'s> Parser<'s> for Ignore<'s> { type Output = Ignore<'s>; fn parse(input: &'s str) -> IResult<&'s str, Ignore<'s>> { map( recognize(many_till(Char::parse, peek(alt((tag("")))))), |ignore| Ignore(ignore), )(input) } } /// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' impl<'s> Parser<'s> for CharRef<'s> { type Output = CharRef<'s>; fn parse(input: &'s str) -> IResult<&'s str, CharRef<'s>> { alt(( delimited( tag("&#"), map(take_while(|c| matches!(c, '0'..='9')), |decimal| { CharRef::Decimal(decimal) }), tag(";"), ), delimited( tag("&#x"), map( take_while(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' )), |hexadecimal| CharRef::Hexadecimal(hexadecimal), ), tag(";"), ), ))(input) } } /// [67] Reference ::= EntityRef | CharRef impl<'s> Parser<'s> for Reference<'s> { type Output = Reference<'s>; fn parse(input: &'s str) -> IResult<&'s str, Reference<'s>> { alt(( map(EntityRef::parse, |entity_ref| { Reference::EntityRef(entity_ref) }), map(CharRef::parse, |char_ref| Reference::CharRef(char_ref)), ))(input) } } /// [68] EntityRef ::= '&' Name ';' impl<'s> Parser<'s> for EntityRef<'s> { type Output = EntityRef<'s>; fn parse(input: &'s str) -> IResult<&'s str, EntityRef<'s>> { map(delimited(tag("&"), Name::parse, tag(";")), |entity_ref| { EntityRef(entity_ref) })(input) } } /// [69] PEReference ::= '%' Name ';' impl<'s> Parser<'s> for PEReference<'s> { type Output = PEReference<'s>; fn parse(input: &'s str) -> IResult<&'s str, PEReference<'s>> { map(delimited(tag("%"), Name::parse, tag(";")), |pe_reference| { PEReference(pe_reference) })(input) } } /// [70] EntityDecl ::= GEDecl | PEDecl impl<'s> Parser<'s> for EntityDecl<'s> { type Output = EntityDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, EntityDecl<'s>> { alt(( map(GEDecl::parse, |ge_decl| EntityDecl::GEDecl(ge_decl)), map(PEDecl::parse, |pe_decl| EntityDecl::PEDecl(pe_decl)), ))(input) } } /// [71] GEDecl ::= '' impl<'s> Parser<'s> for GEDecl<'s> { type Output = GEDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, GEDecl<'s>> { map( delimited( pair(tag("")), ), |(name, entity_def)| GEDecl { name, entity_def }, )(input) } } /// [72] PEDecl ::= '' impl<'s> Parser<'s> for PEDecl<'s> { type Output = PEDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, PEDecl<'s>> { map( delimited( tuple((tag("")), ), |(name, pe_def)| PEDecl { name, pe_def }, )(input) } } /// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) impl<'s> Parser<'s> for EntityDef<'s> { type Output = EntityDef<'s>; fn parse(input: &'s str) -> IResult<&'s str, EntityDef<'s>> { alt(( map(EntityValue::parse, |entity_value| { EntityDef::EntityValue(entity_value) }), map( pair(ExternalID::parse, opt(NDataDecl::parse)), |(external_id, n_data_decl)| EntityDef::ExternalID { external_id, n_data_decl, }, ), ))(input) } } /// [74] PEDef ::= EntityValue | ExternalID impl<'s> Parser<'s> for PEDef<'s> { type Output = PEDef<'s>; fn parse(input: &'s str) -> IResult<&'s str, PEDef<'s>> { alt(( map(EntityValue::parse, |entity_value| { PEDef::EntityValue(entity_value) }), map(ExternalID::parse, |external_id| { PEDef::ExternalID(external_id) }), ))(input) } } /// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral // pub fn external_id(input: &str) -> IResult<&'s str, ExternalID> { impl<'s> Parser<'s> for ExternalID<'s> { type Output = ExternalID<'s>; fn parse(input: &'s str) -> IResult<&'s str, ExternalID<'s>> { alt(( map( preceded(pair(tag("SYSTEM"), S::parse), SystemLiteral::parse), |system_identifier| ExternalID::SYSTEM { system_identifier }, ), map( preceded( pair(tag("PUBLIC"), S::parse), separated_pair(PubidLiteral::parse, S::parse, SystemLiteral::parse), ), |(public_identifier, system_identifier)| ExternalID::PUBLIC { public_identifier, system_identifier, }, ), ))(input) } } /// [76] NDataDecl ::= S 'NDATA' S Name impl<'s> Parser<'s> for NDataDecl<'s> { type Output = NDataDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, NDataDecl<'s>> { map( preceded(tuple((S::parse, tag("NDATA"), S::parse)), Name::parse), |n_data_decl| NDataDecl(n_data_decl), )(input) } } /// [77] TextDecl ::= '' impl<'s> Parser<'s> for TextDecl<'s> { type Output = TextDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, TextDecl<'s>> { map( delimited( tag(""), ), |(version_info, encoding_decl)| TextDecl { version_info, encoding_decl, }, )(input) } } /// [78] extParsedEnt ::= TextDecl? content impl<'s> Parser<'s> for ExtParsedEnt<'s> { type Output = ExtParsedEnt<'s>; fn parse(input: &'s str) -> IResult<&'s str, ExtParsedEnt<'s>> { map( pair(opt(TextDecl::parse), Content::parse), |(text_decl, content)| ExtParsedEnt { text_decl, content }, )(input) } } /// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName impl<'s> Parser<'s> for EncodingDecl<'s> { type Output = EncodingDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, EncodingDecl<'s>> { map( preceded( tuple((S::parse, tag("encoding"), Eq::parse)), alt(( delimited(char('"'), EncName::parse, char('"')), delimited(char('\''), EncName::parse, char('\'')), )), ), |encoding_decl| EncodingDecl(encoding_decl), )(input) } } /// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* impl<'s> Parser<'s> for EncName<'s> { type Output = EncName<'s>; fn parse(input: &'s str) -> IResult<&'s str, EncName<'s>> { map( recognize(pair( satisfy(|c| matches!(c, 'A'..='Z' | 'a'..='z' )), many0(satisfy( |c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | '-' ), )), )), |enc_name| EncName(enc_name), )(input) } } /// [82] NotationDecl ::= '' impl<'s> Parser<'s> for NotationDecl<'s> { type Output = NotationDecl<'s>; fn parse(input: &'s str) -> IResult<&'s str, NotationDecl<'s>> { map( delimited( pair(tag("")), ), |(name, id)| NotationDecl { name, id }, )(input) } } /// [83] PublicID ::= 'PUBLIC' S PubidLiteral impl<'s> Parser<'s> for PublicID<'s> { type Output = PublicID<'s>; fn parse(input: &'s str) -> IResult<&'s str, PublicID<'s>> { map( preceded(pair(tag("PUBLIC"), S::parse), PubidLiteral::parse), |public_id| PublicID(public_id), )(input) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_char_data() { assert_eq!( Ok(("&def]]>ghi", CharData("abc"))), CharData::parse("abc&def]]>ghi") ); assert_eq!( Ok(("]]>ghi", CharData("abcdef"))), CharData::parse("abcdef]]>ghi") ); assert_eq!( Ok(("&defghi", CharData("abc"))), CharData::parse("abc&defghi") ); assert_eq!( Ok(("]]>def&ghi", CharData("abc"))), CharData::parse("abc]]>def&ghi") ); assert_eq!( Ok(("&ghi", CharData("abc]>def"))), CharData::parse("abc]>def&ghi") ); } #[test] fn test_comment() { assert_eq!(Ok(("", Comment(""))), Comment::parse("")); assert_eq!(Ok(("", Comment("asdf"))), Comment::parse("")); assert_eq!(Ok(("", Comment("as-df"))), Comment::parse("")); } #[test] fn test_pi_target() { assert_eq!(Ok((" ", PITarget(Name("asdf")))), PITarget::parse("asdf ")); assert_eq!( Ok((" ", PITarget(Name("xmlasdf")))), PITarget::parse("xmlasdf ") ); assert_eq!( Err(Err::Error(Error { input: "xml ", code: ErrorKind::Tag })), PITarget::parse("xml ") ); assert_eq!( Err(Err::Error(Error { input: "xMl ", code: ErrorKind::Tag })), PITarget::parse("xMl ") ); } #[test] fn test_cd_sect() { assert_eq!( Ok(("", CDSect(CData("Hello, world!")))), CDSect::parse("Hello, world!]]>") ) } #[test] fn test_cd_start() { assert_eq!(Ok(("asdf", CDStart)), CDStart::parse("asdf", CData("asdf"))), CData::parse("asdf]]>asdf")); assert_eq!( Ok(("]]>asdf", CData("asdf") ); assert_eq!( Ok(("]]>asdf", CData("Hello, world!"))), CData::parse("Hello, world!]]>asdf") ) } #[test] fn test_cd_end() { assert_eq!(Ok(("asdf", CDEnd)), CDEnd::parse("]]>asdf")) } }