WIP: element parsing
This commit is contained in:
parent
0b11cbbfd8
commit
94b716753d
|
@ -19,15 +19,6 @@ use nom::{
|
|||
// output is a rust representation of the input xml
|
||||
// types could be used for xml production too?
|
||||
|
||||
enum ContentItem<'s> {
|
||||
CharData(&'s str),
|
||||
Element(Element<'s>),
|
||||
// Reference(Reference<'s>),
|
||||
// CDSect(CDSect<'s>),
|
||||
}
|
||||
|
||||
type Content<'s> = Option<Vec<ContentItem<'s>>>;
|
||||
|
||||
type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
|
||||
/// [1] document ::= prolog element Misc*
|
||||
pub fn document(input: &str) -> IResult<&str, Document> {
|
||||
|
@ -533,13 +524,30 @@ enum Element<'s> {
|
|||
/// [39] element ::= EmptyElemTag | STag content ETag
|
||||
pub fn element(input: &str) -> IResult<&str, Element> {
|
||||
alt((
|
||||
empty_elem_tag,
|
||||
map(tuple((s_tag, content, e_tag)), |(start, content, end)| {}),
|
||||
map(empty_elem_tag, |empty_elem_tag| {
|
||||
Element::Empty(empty_elem_tag)
|
||||
}),
|
||||
map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| {
|
||||
Element::NotEmpty(s_tag, content, e_tag)
|
||||
}),
|
||||
))(input)
|
||||
}
|
||||
|
||||
// let STag<'s> = (Name<'s>, );
|
||||
struct STag<'s> {
|
||||
name: Name<'s>,
|
||||
attributes: Vec<Attribute<'s>>,
|
||||
}
|
||||
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
|
||||
pub fn s_tag(input: &str) -> IResult<&str, STag> {
|
||||
map(
|
||||
delimited(
|
||||
tag("<"),
|
||||
pair(name, many0(preceded(s, attribute))),
|
||||
pair(opt(s), tag(">")),
|
||||
),
|
||||
|(name, attributes)| STag { name, attributes },
|
||||
)(input)
|
||||
}
|
||||
|
||||
type Attribute<'s> = (Name<'s>, AttValue<'s>);
|
||||
/// [41] Attribute ::= Name Eq AttValue
|
||||
|
@ -547,6 +555,64 @@ pub fn attribute(input: &str) -> IResult<&str, Attribute> {
|
|||
separated_pair(name, eq, att_value)(input)
|
||||
}
|
||||
|
||||
struct ETag<'s> {
|
||||
name: Name<'s>,
|
||||
}
|
||||
/// [42] ETag ::= '</' Name S? '>'
|
||||
pub fn e_tag(input: &str) -> IResult<&str, ETag> {
|
||||
map(delimited(tag("</"), name, pair(opt(s), tag(">"))), |name| {
|
||||
ETag { name }
|
||||
})(input)
|
||||
}
|
||||
|
||||
enum ContentItem<'s> {
|
||||
// CharData(&'s str),
|
||||
Element(Element<'s>),
|
||||
Reference(Reference<'s>),
|
||||
CDSect(CDSect<'s>),
|
||||
PI(PI<'s>),
|
||||
Comment(Comment<'s>),
|
||||
}
|
||||
struct Content<'s> {
|
||||
char_data: Option<CharData<'s>>,
|
||||
content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
|
||||
}
|
||||
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
|
||||
pub fn content(input: &str) -> IResult<&str, Content> {
|
||||
map(
|
||||
pair(
|
||||
opt(char_data),
|
||||
many0(pair(
|
||||
alt((
|
||||
map(element, |element| ContentItem::Element(element)),
|
||||
map(reference, |reference| ContentItem::Reference(reference)),
|
||||
map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)),
|
||||
map(pi, |pi| ContentItem::PI(pi)),
|
||||
map(comment, |comment| ContentItem::Comment(comment)),
|
||||
)),
|
||||
opt(char_data),
|
||||
)),
|
||||
),
|
||||
|(char_data, content)| Content { char_data, content },
|
||||
)(input)
|
||||
}
|
||||
|
||||
struct EmptyElemTag<'s> {
|
||||
name: Name<'s>,
|
||||
attributes: Vec<Attribute<'s>>,
|
||||
}
|
||||
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
|
||||
pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {
|
||||
map(
|
||||
delimited(
|
||||
tag("<"),
|
||||
pair(name, many0(preceded(s, attribute))),
|
||||
pair(opt(s), tag("/>")),
|
||||
),
|
||||
|(name, attributes)| EmptyElemTag { name, attributes },
|
||||
)(input)
|
||||
}
|
||||
|
||||
enum CharRef<'s> {
|
||||
Decimal(&'s str),
|
||||
Hexadecimal(&'s str),
|
||||
|
@ -738,7 +804,9 @@ struct ExtParsedEnt<'s> {
|
|||
}
|
||||
/// [78] extParsedEnt ::= TextDecl? content
|
||||
pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> {
|
||||
pair(opt(text_decl), content)(input)
|
||||
map(pair(opt(text_decl), content), |(text_decl, content)| {
|
||||
ExtParsedEnt { text_decl, content }
|
||||
})(input)
|
||||
}
|
||||
|
||||
type EncodingDecl<'s> = EncName<'s>;
|
||||
|
|
Loading…
Reference in New Issue