From 435b2af24cc6a1e8d382ca817bef6a94510127f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= Date: Tue, 25 Jun 2024 16:41:01 +0100 Subject: [PATCH] WIP: element type declarations --- src/parser.rs | 169 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 166 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index d86516a..bae9737 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -434,7 +434,7 @@ pub fn int_subset(input: &str) -> IResult<&str, IntSubset> { } enum MarkupDecl<'s> { - ElementDecl(ElementDecl<'s>), + Elementdecl(Elementdecl<'s>), AttlistDecl(AttlistDecl<'s>), EntityDecl(EntityDecl<'s>), NotationDecl(NotationDecl<'s>), @@ -444,8 +444,8 @@ enum MarkupDecl<'s> { /// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> { alt(( - map(element_decl, |element_decl| { - MarkupDecl::ElementDecl(element_decl) + map(elementdecl, |elementdecl| { + MarkupDecl::Elementdecl(elementdecl) }), map(attlist_decl, |attlist_decl| { MarkupDecl::AttlistDecl(attlist_decl) @@ -613,6 +613,169 @@ pub fn empty_elem_tag(input: &str) -> IResult<&str, EmptyElemTag> { )(input) } +struct Elementdecl<'s> { + name: Name<'s>, + contentspec: Contentspec<'s>, +} +/// [45] elementdecl ::= '' +pub fn elementdecl(input: &str) -> IResult<&str, Elementdecl> { + map( + delimited( + pair(tag("")), + ), + |(name, contentspec)| Elementdecl { name, contentspec }, + )(input) +} + +// TODO: casings??? +#[derive(Clone)] +enum Contentspec<'s> { + Empty, + Any, + Mixed(Mixed<'s>), + Children(Children<'s>), +} +/// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children +pub fn contentspec(input: &str) -> IResult<&str, Contentspec> { + alt(( + value(Contentspec::Empty, tag("EMPTY")), + value(Contentspec::Any, tag("ANY")), + map(mixed, |mixed| Contentspec::Mixed(mixed)), + map(children, |children| Contentspec::Children(children)), + ))(input) +} + +#[derive(Clone)] +enum Occurence { + Once, + Optional, + Many0, + Many1, +} +/// Occurence ::= ('?' | '*' | '+')? +pub fn occurence(input: &str) -> IResult<&str, Occurence> { + map( + opt(alt((tag("?"), tag("*"), tag("+")))), + |occurence| match occurence { + Some("?") => Occurence::Optional, + Some("*") => Occurence::Many0, + Some("+") => Occurence::Many1, + _ => Occurence::Once, + }, + )(input) +} + +#[derive(Clone)] +enum ChildrenKind<'s> { + Choice(Choice<'s>), + Seq(Seq<'s>), +} +#[derive(Clone)] +struct Children<'s> { + kind: ChildrenKind<'s>, + occurence: Occurence, +} +/// [47] children ::= (choice | seq) ('?' | '*' | '+')? +pub fn children(input: &str) -> IResult<&str, Children> { + map( + pair( + alt(( + map(choice, |choice| ChildrenKind::Choice(choice)), + map(seq, |seq| ChildrenKind::Seq(seq)), + )), + occurence, + ), + |(kind, occurence)| Children { kind, occurence }, + )(input) + // alt(( + // map(pair(choice, occurence), |(choice, occurence)| Children::Choice(choice, occurence)), + // map(pair(seq, occurence), |(seq, occurence)| Children::Seq(seq, occurence)) + // ))(input) +} + +#[derive(Clone)] +enum CpKind<'s> { + Name(Name<'s>), + Choice(Choice<'s>), + Seq(Seq<'s>), +} +#[derive(Clone)] +struct Cp<'s> { + kind: CpKind<'s>, + occurence: Occurence, +} +/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? +pub fn cp(input: &str) -> IResult<&str, Cp> { + map( + pair( + alt(( + map(name, |name| CpKind::Name(name)), + map(choice, |choice| CpKind::Choice(choice)), + map(seq, |seq| CpKind::Seq(seq)), + )), + occurence, + ), + |(kind, occurence)| Cp { kind, occurence }, + )(input) +} + +#[derive(Clone)] +struct Choice<'s>(Vec>); +/// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' +pub fn choice(input: &str) -> IResult<&str, Choice> { + map( + delimited( + pair(tag("("), opt(s)), + pair(cp, many1(preceded(tuple((opt(s), tag("|"), opt(s))), cp))), + pair(opt(s), tag(")")), + ), + |(first, rest)| { + let choice = vec![vec![first], rest].concat(); + Choice(choice) + }, + )(input) +} + +#[derive(Clone)] +struct Seq<'s>(Vec>); +/// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' +pub fn seq(input: &str) -> IResult<&str, Seq> { + map( + delimited( + pair(tag("("), opt(s)), + pair(cp, many0(preceded(tuple((opt(s), tag(","), opt(s))), cp))), + pair(opt(s), tag(")")), + ), + |(first, rest)| { + let seq = vec![vec![first], rest].concat(); + Seq(seq) + }, + )(input) +} + +// always contains #PCDATA +#[derive(Clone)] +struct Mixed<'s>(Vec>); +/// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' +pub fn mixed(input: &str) -> IResult<&str, Mixed> { + alt(( + map( + delimited( + tuple((tag("("), s, tag("#PCDATA"))), + many0(preceded(tuple((opt(s), tag("|"), opt(s))), name)), + pair(opt(s), tag(")*")), + ), + |names| Mixed(names), + ), + value( + Mixed(Vec::new()), + tuple((tag("("), opt(s), tag("#PCDATA"), opt(s), tag(")"))), + ), + ))(input) +} + enum CharRef<'s> { Decimal(&'s str), Hexadecimal(&'s str),