From 94b716753d08ec6fea53aa1942ca5ccfeda96fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= Date: Tue, 25 Jun 2024 01:04:52 +0100 Subject: [PATCH] WIP: element parsing --- src/parser.rs | 94 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 13 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 882ebae..d86516a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -19,15 +19,6 @@ use nom::{ // output is a rust representation of the input xml // types could be used for xml production too? -enum ContentItem<'s> { - CharData(&'s str), - Element(Element<'s>), - // Reference(Reference<'s>), - // CDSect(CDSect<'s>), -} - -type Content<'s> = Option>>; - type Document<'s> = (Prolog<'s>, Element<'s>, Vec>); /// [1] document ::= prolog element Misc* pub fn document(input: &str) -> IResult<&str, Document> { @@ -533,13 +524,30 @@ enum Element<'s> { /// [39] element ::= EmptyElemTag | STag content ETag pub fn element(input: &str) -> IResult<&str, Element> { alt(( - empty_elem_tag, - map(tuple((s_tag, content, e_tag)), |(start, content, end)| {}), + map(empty_elem_tag, |empty_elem_tag| { + Element::Empty(empty_elem_tag) + }), + map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| { + Element::NotEmpty(s_tag, content, e_tag) + }), ))(input) } -// let STag<'s> = (Name<'s>, ); +struct STag<'s> { + name: Name<'s>, + attributes: Vec>, +} /// [40] STag ::= '<' Name (S Attribute)* S? '>' +pub fn s_tag(input: &str) -> IResult<&str, STag> { + map( + delimited( + tag("<"), + pair(name, many0(preceded(s, attribute))), + pair(opt(s), tag(">")), + ), + |(name, attributes)| STag { name, attributes }, + )(input) +} type Attribute<'s> = (Name<'s>, AttValue<'s>); /// [41] Attribute ::= Name Eq AttValue @@ -547,6 +555,64 @@ pub fn attribute(input: &str) -> IResult<&str, Attribute> { separated_pair(name, eq, att_value)(input) } +struct ETag<'s> { + name: Name<'s>, +} +/// [42] ETag ::= '' +pub fn e_tag(input: &str) -> IResult<&str, ETag> { + map(delimited(tag(""))), |name| { + ETag { name } + })(input) +} + +enum ContentItem<'s> { + // CharData(&'s str), + Element(Element<'s>), + Reference(Reference<'s>), + CDSect(CDSect<'s>), + PI(PI<'s>), + Comment(Comment<'s>), +} +struct Content<'s> { + char_data: Option>, + content: Vec<(ContentItem<'s>, Option>)>, +} +/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* +pub fn content(input: &str) -> IResult<&str, Content> { + map( + pair( + opt(char_data), + many0(pair( + alt(( + map(element, |element| ContentItem::Element(element)), + map(reference, |reference| ContentItem::Reference(reference)), + map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)), + map(pi, |pi| ContentItem::PI(pi)), + map(comment, |comment| ContentItem::Comment(comment)), + )), + opt(char_data), + )), + ), + |(char_data, content)| Content { char_data, content }, + )(input) +} + +struct EmptyElemTag<'s> { + name: Name<'s>, + attributes: Vec>, +} +/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec] +pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> { + map( + delimited( + tag("<"), + pair(name, many0(preceded(s, attribute))), + pair(opt(s), tag("/>")), + ), + |(name, attributes)| EmptyElemTag { name, attributes }, + )(input) +} + enum CharRef<'s> { Decimal(&'s str), Hexadecimal(&'s str), @@ -738,7 +804,9 @@ struct ExtParsedEnt<'s> { } /// [78] extParsedEnt ::= TextDecl? content pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> { - pair(opt(text_decl), content)(input) + map(pair(opt(text_decl), content), |(text_decl, content)| { + ExtParsedEnt { text_decl, content } + })(input) } type EncodingDecl<'s> = EncName<'s>;