From c08b4504ab326203b2c11abe566e518b6466613a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= Date: Thu, 27 Jun 2024 20:22:05 +0100 Subject: [PATCH] namespace parsing --- src/lib.rs | 2 +- src/{parser.rs => xml/mod.rs} | 203 +++++++++++++++++++++++++++++----- src/xml/parsers.rs | 1 + 3 files changed, 176 insertions(+), 30 deletions(-) rename src/{parser.rs => xml/mod.rs} (88%) create mode 100644 src/xml/parsers.rs diff --git a/src/lib.rs b/src/lib.rs index 8040aec..dcf14fe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ mod element; mod error; -pub mod parser; mod reader; mod writer; +pub mod xml; diff --git a/src/parser.rs b/src/xml/mod.rs similarity index 88% rename from src/parser.rs rename to src/xml/mod.rs index 266becc..47c1779 100644 --- a/src/parser.rs +++ b/src/xml/mod.rs @@ -19,6 +19,105 @@ use nom::{ // output is a rust representation of the input xml // types could be used for xml production too? +mod parsers; + +#[derive(Clone, Debug)] +pub enum NSAttName<'s> { + PrefixedAttName(PrefixedAttName<'s>), + DefaultAttName, +} +/// [1] NSAttName ::= PrefixedAttName | DefaultAttName +pub fn ns_att_name(input: &str) -> IResult<&str, NSAttName> { + alt(( + map(prefixed_att_name, |prefixed_att_name| { + NSAttName::PrefixedAttName(prefixed_att_name) + }), + value(NSAttName::DefaultAttName, default_att_name), + ))(input) +} + +#[derive(Clone, Debug)] +pub struct PrefixedAttName<'s>(NCName<'s>); +/// [2] PrefixedAttName ::= 'xmlns:' NCName +pub fn prefixed_att_name(input: &str) -> IResult<&str, PrefixedAttName> { + map(preceded(tag("xmlns:"), nc_name), |nc_name| { + PrefixedAttName(nc_name) + })(input) +} + +#[derive(Clone, Debug)] +pub struct DefaultAttName; +/// [3] DefaultAttName ::= 'xmlns'; +pub fn default_att_name(input: &str) -> IResult<&str, DefaultAttName> { + value(DefaultAttName, tag("xmlns"))(input) +} + +#[derive(Clone, Debug)] +pub struct NCName<'s>(&'s str); +/// [4] NCName ::= Name - (Char* ':' Char*) +pub fn nc_name(input: &str) -> IResult<&str, NCName> { + map( + recognize(pair( + recognize(name_start_char).and_then(satisfy(|c| c != ':')), + many_till(name_char, peek(char(':'))), + )), + |nc_name| NCName(nc_name), + )(input) +} + +#[derive(Clone, Debug)] +pub enum QName<'s> { + PrefixedName(PrefixedName<'s>), + UnprefixedName(UnprefixedName<'s>), +} +/// [7] QName ::= PrefixedName | UnprefixedName +pub fn q_name(input: &str) -> IResult<&str, QName> { + alt(( + map(prefixed_name, |prefixed_name| { + QName::PrefixedName(prefixed_name) + }), + map(unprefixed_name, |unprefixed_name| { + QName::UnprefixedName(unprefixed_name) + }), + ))(input) +} + +#[derive(Clone, Debug)] +pub struct PrefixedName<'s> { + prefix: Prefix<'s>, + local_part: LocalPart<'s>, +} +/// [8] PrefixedName ::= Prefix ':' LocalPart +pub fn prefixed_name(input: &str) -> IResult<&str, PrefixedName> { + map( + separated_pair(prefix, char(':'), local_part), + |(prefix, local_part)| PrefixedName { prefix, local_part }, + )(input) +} + +#[derive(Clone, Debug)] +pub struct UnprefixedName<'s>(LocalPart<'s>); +/// [9] UnprefixedName ::= LocalPart +pub fn unprefixed_name(input: &str) -> IResult<&str, UnprefixedName> { + map(local_part, |local_part| UnprefixedName(local_part))(input) +} + +#[derive(Clone, Debug)] +pub struct Prefix<'s>(NCName<'s>); +/// [10] Prefix ::= NCName +pub fn prefix(input: &str) -> IResult<&str, Prefix> { + map(nc_name, |nc_name| Prefix(nc_name))(input) +} + +#[derive(Clone, Debug)] +pub struct LocalPart<'s>(NCName<'s>); +/// [11] LocalPart ::= NCName +pub fn local_part(input: &str) -> IResult<&str, LocalPart> { + map(nc_name, |nc_name| LocalPart(nc_name))(input) +} + +// xml spec + pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec>); /// [1] document ::= prolog element Misc* pub fn document(input: &str) -> IResult<&str, Document> { @@ -375,17 +474,18 @@ pub fn misc(input: &str) -> IResult<&str, Misc> { #[derive(Debug)] pub struct DoctypeDecl<'s> { - name: &'s str, + name: QName<'s>, external_id: Option>, int_subset: Option>, } +/// [16] doctypedecl ::= '' /// [28] doctypedecl ::= '' pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> { map( delimited( pair(tag(" IResult<&str, SDDecl> { // (Productions 33 through 38 have been removed.) -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Element<'s> { Empty(EmptyElemTag<'s>), NotEmpty(STag<'s>, Content<'s>, ETag<'s>), @@ -539,41 +639,67 @@ pub fn element(input: &str) -> IResult<&str, Element> { ))(input) } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct STag<'s> { - name: Name<'s>, + name: QName<'s>, attributes: Vec>, } +/// [12] STag ::= '<' QName (S Attribute)* S? '>' /// [40] STag ::= '<' Name (S Attribute)* S? '>' pub fn s_tag(input: &str) -> IResult<&str, STag> { map( delimited( tag("<"), - pair(name, many0(preceded(s, attribute))), + pair(q_name, many0(preceded(s, attribute))), pair(opt(s), tag(">")), ), |(name, attributes)| STag { name, attributes }, )(input) } -pub type Attribute<'s> = (Name<'s>, AttValue<'s>); -/// [41] Attribute ::= Name Eq AttValue +#[derive(Debug, Clone)] +pub enum Attribute<'s> { + NamespaceDeclaration { + ns_name: NSAttName<'s>, + value: AttValue<'s>, + }, + Attribute { + name: QName<'s>, + value: AttValue<'s>, + }, +} +/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue pub fn attribute(input: &str) -> IResult<&str, Attribute> { - separated_pair(name, eq, att_value)(input) + alt(( + map( + separated_pair(ns_att_name, eq, att_value), + |(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value }, + ), + map(separated_pair(q_name, eq, att_value), |(name, value)| { + Attribute::Attribute { name, value } + }), + ))(input) } +// pub type Attribute<'s> = (Name<'s>, AttValue<'s>); +/// [41] Attribute ::= Name Eq AttValue +// pub fn attribute(input: &str) -> IResult<&str, Attribute> { +// separated_pair(name, eq, att_value)(input) +// } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ETag<'s> { - name: Name<'s>, + name: QName<'s>, } +/// [13] ETag ::= '' /// [42] ETag ::= '' pub fn e_tag(input: &str) -> IResult<&str, ETag> { - map(delimited(tag(""))), |name| { - ETag { name } - })(input) + map( + delimited(tag(""))), + |name| ETag { name }, + )(input) } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum ContentItem<'s> { // CharData(&'s str), Element(Element<'s>), @@ -582,7 +708,7 @@ pub enum ContentItem<'s> { PI(PI<'s>), Comment(Comment<'s>), } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Content<'s> { char_data: Option>, content: Vec<(ContentItem<'s>, Option>)>, @@ -607,17 +733,18 @@ pub fn content(input: &str) -> IResult<&str, Content> { )(input) } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct EmptyElemTag<'s> { - name: Name<'s>, + name: QName<'s>, attributes: Vec>, } +/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' /// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec] pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> { map( delimited( tag("<"), - pair(name, many0(preceded(s, attribute))), + pair(q_name, many0(preceded(s, attribute))), pair(opt(s), tag("/>")), ), |(name, attributes)| EmptyElemTag { name, attributes }, @@ -626,15 +753,16 @@ pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> { #[derive(Debug)] pub struct Elementdecl<'s> { - name: Name<'s>, + name: QName<'s>, contentspec: Contentspec<'s>, } +/// [17] elementdecl ::= '' /// [45] elementdecl ::= '' pub fn elementdecl(input: &str) -> IResult<&str, Elementdecl> { map( delimited( pair(tag("")), ), |(name, contentspec)| Elementdecl { name, contentspec }, @@ -709,7 +837,7 @@ pub fn children(input: &str) -> IResult<&str, Children> { #[derive(Clone, Debug)] pub enum CpKind<'s> { - Name(Name<'s>), + Name(QName<'s>), Choice(Choice<'s>), Seq(Seq<'s>), } @@ -718,12 +846,13 @@ pub struct Cp<'s> { kind: CpKind<'s>, occurence: Occurence, } +/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')? /// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? pub fn cp(input: &str) -> IResult<&str, Cp> { map( pair( alt(( - map(name, |name| CpKind::Name(name)), + map(q_name, |name| CpKind::Name(name)), map(choice, |choice| CpKind::Choice(choice)), map(seq, |seq| CpKind::Seq(seq)), )), @@ -769,14 +898,15 @@ pub fn seq(input: &str) -> IResult<&str, Seq> { // always contains #PCDATA #[derive(Clone, Debug)] -pub struct Mixed<'s>(Vec>); +pub struct Mixed<'s>(Vec>); +/// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')' /// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' pub fn mixed(input: &str) -> IResult<&str, Mixed> { alt(( map( delimited( tuple((tag("("), s, tag("#PCDATA"))), - many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)), + many0(preceded(tuple((opt(s), tag("|"), opt(s))), q_name)), pair(opt(s), tag(")*")), ), |names| Mixed(names), @@ -790,15 +920,16 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> { #[derive(Debug)] pub struct AttlistDecl<'s> { - element_type: Name<'s>, + element_type: QName<'s>, att_defs: Vec>, } +/// [20] AttlistDecl ::= '' /// [52] AttlistDecl ::= '' pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> { map( delimited( pair(tag("")), ), |(element_type, att_defs)| AttlistDecl { @@ -808,17 +939,31 @@ pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> { )(input) } +#[derive(Debug)] +pub enum AttDefName<'s> { + QName(QName<'s>), + NSAttName(NSAttName<'s>), +} #[derive(Debug)] pub struct AttDef<'s> { - name: Name<'s>, + name: AttDefName<'s>, att_type: AttType<'s>, default_decl: DefaultDecl<'s>, } +/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl /// [53] AttDef ::= S Name S AttType S DefaultDecl pub fn att_def(input: &str) -> IResult<&str, AttDef> { map( tuple(( - preceded(s, name), + preceded( + s, + alt(( + map(q_name, |q_name| AttDefName::QName(q_name)), + map(ns_att_name, |ns_att_name| { + AttDefName::NSAttName(ns_att_name) + }), + )), + ), preceded(s, att_type), preceded(s, default_decl), )), diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/xml/parsers.rs @@ -0,0 +1 @@ +