namespace parsing

This commit is contained in:
cel 🌸 2024-06-27 20:22:05 +01:00
parent fa54b2dd3b
commit c08b4504ab
3 changed files with 176 additions and 30 deletions

View File

@ -1,5 +1,5 @@
mod element;
mod error;
pub mod parser;
mod reader;
mod writer;
pub mod xml;

View File

@ -19,6 +19,105 @@ use nom::{
// output is a rust representation of the input xml
// types could be used for xml production too?
mod parsers;
#[derive(Clone, Debug)]
pub enum NSAttName<'s> {
PrefixedAttName(PrefixedAttName<'s>),
DefaultAttName,
}
/// [1] NSAttName ::= PrefixedAttName | DefaultAttName
pub fn ns_att_name(input: &str) -> IResult<&str, NSAttName> {
alt((
map(prefixed_att_name, |prefixed_att_name| {
NSAttName::PrefixedAttName(prefixed_att_name)
}),
value(NSAttName::DefaultAttName, default_att_name),
))(input)
}
#[derive(Clone, Debug)]
pub struct PrefixedAttName<'s>(NCName<'s>);
/// [2] PrefixedAttName ::= 'xmlns:' NCName
pub fn prefixed_att_name(input: &str) -> IResult<&str, PrefixedAttName> {
map(preceded(tag("xmlns:"), nc_name), |nc_name| {
PrefixedAttName(nc_name)
})(input)
}
#[derive(Clone, Debug)]
pub struct DefaultAttName;
/// [3] DefaultAttName ::= 'xmlns';
pub fn default_att_name(input: &str) -> IResult<&str, DefaultAttName> {
value(DefaultAttName, tag("xmlns"))(input)
}
#[derive(Clone, Debug)]
pub struct NCName<'s>(&'s str);
/// [4] NCName ::= Name - (Char* ':' Char*)
pub fn nc_name(input: &str) -> IResult<&str, NCName> {
map(
recognize(pair(
recognize(name_start_char).and_then(satisfy(|c| c != ':')),
many_till(name_char, peek(char(':'))),
)),
|nc_name| NCName(nc_name),
)(input)
}
#[derive(Clone, Debug)]
pub enum QName<'s> {
PrefixedName(PrefixedName<'s>),
UnprefixedName(UnprefixedName<'s>),
}
/// [7] QName ::= PrefixedName | UnprefixedName
pub fn q_name(input: &str) -> IResult<&str, QName> {
alt((
map(prefixed_name, |prefixed_name| {
QName::PrefixedName(prefixed_name)
}),
map(unprefixed_name, |unprefixed_name| {
QName::UnprefixedName(unprefixed_name)
}),
))(input)
}
#[derive(Clone, Debug)]
pub struct PrefixedName<'s> {
prefix: Prefix<'s>,
local_part: LocalPart<'s>,
}
/// [8] PrefixedName ::= Prefix ':' LocalPart
pub fn prefixed_name(input: &str) -> IResult<&str, PrefixedName> {
map(
separated_pair(prefix, char(':'), local_part),
|(prefix, local_part)| PrefixedName { prefix, local_part },
)(input)
}
#[derive(Clone, Debug)]
pub struct UnprefixedName<'s>(LocalPart<'s>);
/// [9] UnprefixedName ::= LocalPart
pub fn unprefixed_name(input: &str) -> IResult<&str, UnprefixedName> {
map(local_part, |local_part| UnprefixedName(local_part))(input)
}
#[derive(Clone, Debug)]
pub struct Prefix<'s>(NCName<'s>);
/// [10] Prefix ::= NCName
pub fn prefix(input: &str) -> IResult<&str, Prefix> {
map(nc_name, |nc_name| Prefix(nc_name))(input)
}
#[derive(Clone, Debug)]
pub struct LocalPart<'s>(NCName<'s>);
/// [11] LocalPart ::= NCName
pub fn local_part(input: &str) -> IResult<&str, LocalPart> {
map(nc_name, |nc_name| LocalPart(nc_name))(input)
}
// xml spec
pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [1] document ::= prolog element Misc*
pub fn document(input: &str) -> IResult<&str, Document> {
@ -375,17 +474,18 @@ pub fn misc(input: &str) -> IResult<&str, Misc> {
#[derive(Debug)]
pub struct DoctypeDecl<'s> {
name: &'s str,
name: QName<'s>,
external_id: Option<ExternalID<'s>>,
int_subset: Option<IntSubset<'s>>,
}
/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
map(
delimited(
pair(tag("<!DOCTYPE"), s),
tuple((
name,
q_name,
opt(preceded(s, external_id)),
preceded(
opt(s),
@ -522,7 +622,7 @@ pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
// (Productions 33 through 38 have been removed.)
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum Element<'s> {
Empty(EmptyElemTag<'s>),
NotEmpty(STag<'s>, Content<'s>, ETag<'s>),
@ -539,41 +639,67 @@ pub fn element(input: &str) -> IResult<&str, Element> {
))(input)
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct STag<'s> {
name: Name<'s>,
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
/// [12] STag ::= '<' QName (S Attribute)* S? '>'
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
pub fn s_tag(input: &str) -> IResult<&str, STag> {
map(
delimited(
tag("<"),
pair(name, many0(preceded(s, attribute))),
pair(q_name, many0(preceded(s, attribute))),
pair(opt(s), tag(">")),
),
|(name, attributes)| STag { name, attributes },
)(input)
}
pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
/// [41] Attribute ::= Name Eq AttValue
#[derive(Debug, Clone)]
pub enum Attribute<'s> {
NamespaceDeclaration {
ns_name: NSAttName<'s>,
value: AttValue<'s>,
},
Attribute {
name: QName<'s>,
value: AttValue<'s>,
},
}
/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
pub fn attribute(input: &str) -> IResult<&str, Attribute> {
separated_pair(name, eq, att_value)(input)
alt((
map(
separated_pair(ns_att_name, eq, att_value),
|(ns_name, value)| Attribute::NamespaceDeclaration { ns_name, value },
),
map(separated_pair(q_name, eq, att_value), |(name, value)| {
Attribute::Attribute { name, value }
}),
))(input)
}
// pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
/// [41] Attribute ::= Name Eq AttValue
// pub fn attribute(input: &str) -> IResult<&str, Attribute> {
// separated_pair(name, eq, att_value)(input)
// }
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct ETag<'s> {
name: Name<'s>,
name: QName<'s>,
}
/// [13] ETag ::= '</' QName S? '>'
/// [42] ETag ::= '</' Name S? '>'
pub fn e_tag(input: &str) -> IResult<&str, ETag> {
map(delimited(tag("</"), name, pair(opt(s), tag(">"))), |name| {
ETag { name }
})(input)
map(
delimited(tag("</"), q_name, pair(opt(s), tag(">"))),
|name| ETag { name },
)(input)
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum ContentItem<'s> {
// CharData(&'s str),
Element(Element<'s>),
@ -582,7 +708,7 @@ pub enum ContentItem<'s> {
PI(PI<'s>),
Comment(Comment<'s>),
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct Content<'s> {
char_data: Option<CharData<'s>>,
content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
@ -607,17 +733,18 @@ pub fn content(input: &str) -> IResult<&str, Content> {
)(input)
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
name: Name<'s>,
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {
map(
delimited(
tag("<"),
pair(name, many0(preceded(s, attribute))),
pair(q_name, many0(preceded(s, attribute))),
pair(opt(s), tag("/>")),
),
|(name, attributes)| EmptyElemTag { name, attributes },
@ -626,15 +753,16 @@ pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {
#[derive(Debug)]
pub struct Elementdecl<'s> {
name: Name<'s>,
name: QName<'s>,
contentspec: Contentspec<'s>,
}
/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
pub fn elementdecl(input: &str) -> IResult<&str, Elementdecl> {
map(
delimited(
pair(tag("<!ELEMENT"), s),
separated_pair(name, s, contentspec),
separated_pair(q_name, s, contentspec),
pair(opt(s), tag(">")),
),
|(name, contentspec)| Elementdecl { name, contentspec },
@ -709,7 +837,7 @@ pub fn children(input: &str) -> IResult<&str, Children> {
#[derive(Clone, Debug)]
pub enum CpKind<'s> {
Name(Name<'s>),
Name(QName<'s>),
Choice(Choice<'s>),
Seq(Seq<'s>),
}
@ -718,12 +846,13 @@ pub struct Cp<'s> {
kind: CpKind<'s>,
occurence: Occurence,
}
/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')?
/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
pub fn cp(input: &str) -> IResult<&str, Cp> {
map(
pair(
alt((
map(name, |name| CpKind::Name(name)),
map(q_name, |name| CpKind::Name(name)),
map(choice, |choice| CpKind::Choice(choice)),
map(seq, |seq| CpKind::Seq(seq)),
)),
@ -769,14 +898,15 @@ pub fn seq(input: &str) -> IResult<&str, Seq> {
// always contains #PCDATA
#[derive(Clone, Debug)]
pub struct Mixed<'s>(Vec<Name<'s>>);
pub struct Mixed<'s>(Vec<QName<'s>>);
/// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'
/// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
pub fn mixed(input: &str) -> IResult<&str, Mixed> {
alt((
map(
delimited(
tuple((tag("("), s, tag("#PCDATA"))),
many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)),
many0(preceded(tuple((opt(s), tag("|"), opt(s))), q_name)),
pair(opt(s), tag(")*")),
),
|names| Mixed(names),
@ -790,15 +920,16 @@ pub fn mixed(input: &str) -> IResult<&str, Mixed> {
#[derive(Debug)]
pub struct AttlistDecl<'s> {
element_type: Name<'s>,
element_type: QName<'s>,
att_defs: Vec<AttDef<'s>>,
}
/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'
/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
map(
delimited(
pair(tag("<!ATTLIST"), s),
pair(name, many0(att_def)),
pair(q_name, many0(att_def)),
pair(opt(s), tag(">")),
),
|(element_type, att_defs)| AttlistDecl {
@ -808,17 +939,31 @@ pub fn attlist_decl(input: &str) -> IResult<&str, AttlistDecl> {
)(input)
}
#[derive(Debug)]
pub enum AttDefName<'s> {
QName(QName<'s>),
NSAttName(NSAttName<'s>),
}
#[derive(Debug)]
pub struct AttDef<'s> {
name: Name<'s>,
name: AttDefName<'s>,
att_type: AttType<'s>,
default_decl: DefaultDecl<'s>,
}
/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
/// [53] AttDef ::= S Name S AttType S DefaultDecl
pub fn att_def(input: &str) -> IResult<&str, AttDef> {
map(
tuple((
preceded(s, name),
preceded(
s,
alt((
map(q_name, |q_name| AttDefName::QName(q_name)),
map(ns_att_name, |ns_att_name| {
AttDefName::NSAttName(ns_att_name)
}),
)),
),
preceded(s, att_type),
preceded(s, default_decl),
)),

1
src/xml/parsers.rs Normal file
View File

@ -0,0 +1 @@