WIP: element parsing

This commit is contained in:
cel 🌸 2024-06-25 01:04:52 +01:00
parent 0b11cbbfd8
commit 94b716753d
1 changed files with 81 additions and 13 deletions

View File

@ -19,15 +19,6 @@ use nom::{
// output is a rust representation of the input xml
// types could be used for xml production too?
enum ContentItem<'s> {
CharData(&'s str),
Element(Element<'s>),
// Reference(Reference<'s>),
// CDSect(CDSect<'s>),
}
type Content<'s> = Option<Vec<ContentItem<'s>>>;
type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [1] document ::= prolog element Misc*
pub fn document(input: &str) -> IResult<&str, Document> {
@ -533,13 +524,30 @@ enum Element<'s> {
/// [39] element ::= EmptyElemTag | STag content ETag
pub fn element(input: &str) -> IResult<&str, Element> {
alt((
empty_elem_tag,
map(tuple((s_tag, content, e_tag)), |(start, content, end)| {}),
map(empty_elem_tag, |empty_elem_tag| {
Element::Empty(empty_elem_tag)
}),
map(tuple((s_tag, content, e_tag)), |(s_tag, content, e_tag)| {
Element::NotEmpty(s_tag, content, e_tag)
}),
))(input)
}
// let STag<'s> = (Name<'s>, );
struct STag<'s> {
name: Name<'s>,
attributes: Vec<Attribute<'s>>,
}
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
pub fn s_tag(input: &str) -> IResult<&str, STag> {
map(
delimited(
tag("<"),
pair(name, many0(preceded(s, attribute))),
pair(opt(s), tag(">")),
),
|(name, attributes)| STag { name, attributes },
)(input)
}
type Attribute<'s> = (Name<'s>, AttValue<'s>);
/// [41] Attribute ::= Name Eq AttValue
@ -547,6 +555,64 @@ pub fn attribute(input: &str) -> IResult<&str, Attribute> {
separated_pair(name, eq, att_value)(input)
}
struct ETag<'s> {
name: Name<'s>,
}
/// [42] ETag ::= '</' Name S? '>'
pub fn e_tag(input: &str) -> IResult<&str, ETag> {
map(delimited(tag("</"), name, pair(opt(s), tag(">"))), |name| {
ETag { name }
})(input)
}
enum ContentItem<'s> {
// CharData(&'s str),
Element(Element<'s>),
Reference(Reference<'s>),
CDSect(CDSect<'s>),
PI(PI<'s>),
Comment(Comment<'s>),
}
struct Content<'s> {
char_data: Option<CharData<'s>>,
content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
}
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
pub fn content(input: &str) -> IResult<&str, Content> {
map(
pair(
opt(char_data),
many0(pair(
alt((
map(element, |element| ContentItem::Element(element)),
map(reference, |reference| ContentItem::Reference(reference)),
map(cd_sect, |cd_sect| ContentItem::CDSect(cd_sect)),
map(pi, |pi| ContentItem::PI(pi)),
map(comment, |comment| ContentItem::Comment(comment)),
)),
opt(char_data),
)),
),
|(char_data, content)| Content { char_data, content },
)(input)
}
struct EmptyElemTag<'s> {
name: Name<'s>,
attributes: Vec<Attribute<'s>>,
}
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> {
map(
delimited(
tag("<"),
pair(name, many0(preceded(s, attribute))),
pair(opt(s), tag("/>")),
),
|(name, attributes)| EmptyElemTag { name, attributes },
)(input)
}
enum CharRef<'s> {
Decimal(&'s str),
Hexadecimal(&'s str),
@ -738,7 +804,9 @@ struct ExtParsedEnt<'s> {
}
/// [78] extParsedEnt ::= TextDecl? content
pub fn ext_parsed_ent(input: &str) -> IResult<&str, ExtParsedEnt> {
pair(opt(text_decl), content)(input)
map(pair(opt(text_decl), content), |(text_decl, content)| {
ExtParsedEnt { text_decl, content }
})(input)
}
type EncodingDecl<'s> = EncName<'s>;