peanuts/src/parser.rs

use std::char;

use nom::{
    branch::{alt, permutation},
    bytes::streaming::{is_a, is_not, tag, take, take_till, take_until},
    character::{
        complete::one_of,
        streaming::{alpha1, char, digit1, none_of, satisfy},
    },
    combinator::{cond, map, map_parser, map_res, not, opt, peek, recognize, value, verify},
    error::{Error, ErrorKind},
    multi::{many0, many1, many_till},
    sequence::{delimited, pair, preceded, tuple},
    Err, IResult, Parser,
};

// parser: parses tokens from lexer into events

type Comment<'s> = &'s str;

struct PI<'s> {
    target: &'s str,
    instruction: Option<&'s str>,
}

enum ContentItem<'s> {
    CharData(&'s str),
    Element(Element<'s>),
    // Reference(Reference<'s>),
    // CDSect(CDSect<'s>),
}

type Content<'s> = Option<Vec<ContentItem<'s>>>;

struct Attribute<'s> {
    key: &'s str,
    value: &'s str,
}

/// Contains only latin characters or dash after first char
type EncName<'s> = &'s str;

struct DoctypeDecl<'s> {
    name: &'s str,
    // TODO: doctype declaration parsing
}
///
pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
    todo!()
}

struct Element<'s> {
    name: &'s str,
    attributes: Vec<Attribute<'s>>,
    content: Content<'s>,
}
/// Element
pub fn element(input: &str) -> IResult<&str, Element> {
    todo!()
}

enum Misc<'s> {
    Comment(Comment<'s>),
    PI(PI<'s>),
}
/// Misc
pub fn misc(input: &str) -> IResult<&str, Misc> {
    todo!()
}

type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [1]   	document	   ::=   	prolog element Misc*
pub fn document(input: &str) -> IResult<&str, Document> {
    tuple((prolog, element, many0(misc)))(input)
}

type Char = char;
/// [2]   	Char	   ::=   	#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]	/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
pub fn xmlchar(input: &str) -> IResult<&str, Char> {
    satisfy(
        |c| matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}'),
    )(input)
}

type S<'s> = &'s str;
/// [3]   	S	   ::=   	(#x20 | #x9 | #xD | #xA)+
pub fn s(input: &str) -> IResult<&str, S> {
    is_a("\u{20}\u{9}\u{D}\u{A}")(input)
}

type NameStartChar = char;
/// [4]   	NameStartChar	   ::=   	":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
pub fn name_start_char(input: &str) -> IResult<&str, NameStartChar> {
    satisfy(
        |c| matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' | '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}'),
    )(input)
}

type NameChar = char;
/// [4a]   	NameChar	   ::=   	NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
pub fn name_char(input: &str) -> IResult<&str, NameChar> {
    alt((
        name_start_char,
        satisfy(
            |c| matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'),
        ),
    ))(input)
}

type Name<'s> = &'s str;
/// [5]   	Name	   ::=   	NameStartChar (NameChar)*
pub fn name(input: &str) -> IResult<&str, Name> {
    recognize(pair(name_start_char, many0(name_char)))(input)
}

type Names<'s> = &'s str;
/// [6]   	Names	   ::=   	Name (#x20 Name)*
pub fn names(input: &str) -> IResult<&str, Names> {
    recognize(pair(name, many0(pair(char('\u{20}'), name))))(input)
}

type Nmtoken<'s> = &'s str;
/// [7]   	Nmtoken	   ::=   	(NameChar)+
pub fn nmtoken(input: &str) -> IResult<&str, Nmtoken> {
    recognize(many1(name_char))(input)
}

type Nmtokens<'s> = &'s str;
/// [8]   	Nmtokens	   ::=   	Nmtoken (#x20 Nmtoken)*
pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {
    recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)
}

type EntityValue<'s> = &'s str;
/// [9]   	EntityValue	   ::=   	'"' ([^%&"] | PEReference | Reference)* '"'
///			|  "'" ([^%&'] | PEReference | Reference)* "'"
pub fn entity_value(input: &str) -> IResult<&str, EntityValue> {
    alt((
        delimited(
            char('"'),
            recognize(many0(alt((none_of("%&\""), pe_reference, reference)))),
            char('"'),
        ),
        delimited(
            char('\''),
            recognize(many0(alt((none_of("%&'"), pe_reference, reference)))),
            char('\''),
        ),
    ))(input)
}

type AttValue<'s> = &'s str;
/// [10]   	AttValue	   ::=   	'"' ([^<&"] | Reference)* '"'
/// 			|  "'" ([^<&'] | Reference)* "'"
pub fn att_value(input: &str) -> IResult<&str, AttValue> {
    alt((
        delimited(
            char('"'),
            recognize(many0(alt((none_of("<&\""), reference)))),
            char('"'),
        ),
        delimited(
            char('\''),
            recognize(many0(alt((none_of("<&'"), reference)))),
            char('\''),
        ),
    ))(input)
}

type SystemLiteral<'s> = &'s str;
/// [11]   	SystemLiteral	   ::=   	('"' [^"]* '"') | ("'" [^']* "'")
pub fn system_literal(input: &str) -> IResult<&str, SystemLiteral> {
    alt((
        delimited(char('"'), recognize(many0(none_of("\""))), char('"')),
        delimited(char('\''), recognize(many0(none_of("'"))), char('\'')),
    ))(input)
}

type PubidLiteral<'s> = &'s str;
/// [12]   	PubidLiteral	   ::=   	'"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
pub fn pubid_literal(input: &str) -> IResult<&str, PubidLiteral> {
    alt((
        delimited(char('"'), recognize(many0(pubid_char)), char('"')),
        delimited(
            char('\''),
            recognize(many0(recognize(not(char('\''))).and_then(pubid_char))),
            char('\''),
        ),
    ))(input)
}

type PubidChar<'s> = char;
/// [13]   	PubidChar	   ::=   	#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> {
    satisfy(|c| matches!(c, '\u{20}' | '\u{D}' | '\u{A}' | 'a'..='z' | 'A'..='Z' | '0'..='9'))(
        input,
    )
}

// TODO: wtf why doesn't this work how do i do thisjj
type CharData<'s> = &'s str;
/// [14]   	CharData	   ::=   	[^<&]* - ([^<&]* ']]>' [^<&]*)
pub fn char_data(input: &str) -> IResult<&str, CharData> {
    // tag(map(
    //     peek(alt((
    //         map_parser(
    //             peek(take_until("]]>")),
    //             nom::bytes::complete::take_till(|c| c == '<' || c == '&'),
    //         ),
    //         map_parser(
    //             peek(take_till(|c| c == '<' || c == '&')),
    //             nom::bytes::complete::take_until("]]>"),
    //         ),
    //     ))),
    //     |(first, _)| first,
    // ))(input)

    // map(
    //     tuple((is_not("<&]"), peek(alt((tag("<"), tag("&"), tag("]]>")))))),
    //     |(first, _)| first,
    // )(input)
    // map(
    //     tuple((recognize(many0(none_of("<&"))), opt(peek(tag("]]>"))))),
    //     |(first, _)| first,
    // )(input)
    // alt((recognize(many0(none_of("<&"))), take_until("]]>")))(input)
    let tagg: &str;
    if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {
        if let Ok((_, tagg2)) =
            peek::<&str, &str, Error<&str>, _>(take_till(|c: char| c == '<' || c == '&'))(input)
        {
            if tagg1.len() < tagg2.len() {
                tagg = tagg1
            } else {
                tagg = tagg2
            }
        } else {
            tagg = tagg1;
        }
    } else {
        (_, tagg) = peek(take_till(|c| c == '<' || c == '&'))(input)?
    }
    tag(tagg)(input)
    // let mut len = 0;
    // let ch  = input.chars().collect::<Vec<_>>();
    // for (idx, char) in ch.as_ref().into_iter().enumerate() {
    //     match char {
    //         '<' | '&' =>                 break,
    //         ']' => {
    //             if idx <= ch.len() - 3 {}
    //         },
    //         _ => todo!(),
    //     }
    // }
    // while let Some(char) = chars.next() {
    //     if char == '<' || char == '&' {
    //         break;
    //     } else if char == ']' {
    //         if let Some(next) = chars.peek() {
    //             if next == ']' {
    //                 if let Some(next) = chars.next_if_eq() {}
    //             }
    //         }
    //     }
    //     len += 1;
    // }
    // todo!()
    // recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input)
    // recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input)
    // take_till(|c| c == '<' || c == '&').and_then(take_until("]]>"))(input)
}

type Prolog<'s> = (
    Option<XMLDecl>,
    Vec<Misc<'s>>,
    Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,
);
/// [22]   	prolog	   ::=   	XMLDecl? Misc* (doctypedecl Misc*)?
pub fn prolog(input: &str) -> IResult<&str, Prolog> {
    tuple((
        opt(xml_decl),
        many0(misc),
        opt(tuple((doctypedecl, many0(misc)))),
    ))(input)
}

struct XMLDecl {
    version_info: VersionInfo,
    // encoding_decl: Option<EncodingDecl>,
    // sd_decl: Option<SDDecl>,
}
/// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {
    // (VersionInfo, Option<EncodingDecl>, Option<SDDecl>)
    let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited(
        tag("<?xml"),
        tuple((version_info /* opt(encoding_decl), opt(sd_decl) */,)),
        tag("?>"),
    )(input)?;
    Ok((
        leftover,
        XMLDecl {
            version_info,
            // encoding_decl,
            // sd_decl,
        },
    ))
}

type VersionInfo = VersionNum;
/// [24]   	VersionInfo	   ::=   	S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
pub fn version_info(input: &str) -> IResult<&str, VersionInfo> {
    preceded(
        tuple((s, tag("version"), eq)),
        alt((
            delimited(char('\''), version_num, char('\'')),
            delimited(char('"'), version_num, char('"')),
        )),
    )(input)
}

/// [25]   	Eq	   ::=   	S? '=' S?
pub fn eq(input: &str) -> IResult<&str, (Option<&str>, char, Option<&str>)> {
    tuple((opt(s), char('='), opt(s)))(input)
}

#[derive(Clone)]
enum VersionNum {
    One,
    OneDotOne,
}
/// [26]   	VersionNum	   ::=   	'1.' [0-9]+
pub fn version_num(input: &str) -> IResult<&str, VersionNum> {
    preceded(
        tag("1."),
        alt((
            value(VersionNum::One, char('0')),
            value(VersionNum::OneDotOne, char('1')),
        )),
    )(input)
}

pub fn reference(input: &str) -> IResult<&str, char> {
    todo!()
}

pub fn pe_reference(input: &str) -> IResult<&str, char> {
    todo!()
}

#[cfg(test)]
mod tests {
    use std::num::NonZero;

    use super::*;

    #[test]
    fn test_char_data() {
        assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi"));
        assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi"));
        assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi"));
        assert_eq!(Ok(("]]>def&ghi", "abc")), char_data("abc]]>def&ghi"));
        assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi"));
        assert_eq!(
            Err(Err::Incomplete(nom::Needed::Size(
                NonZero::new(1usize).unwrap()
            ))),
            char_data("abcdefghi")
        );
    }
}
WIP: parsers 2024-06-12 10:15:48 +01:00			`use std::char;`

			`use nom::{`
FML 2024-06-14 16:53:47 +01:00			`branch::{alt, permutation},`
			`bytes::streaming::{is_a, is_not, tag, take, take_till, take_until},`
WIP: parsers 2024-06-12 10:15:48 +01:00			`character::{`
			`complete::one_of,`
FML 2024-06-14 16:53:47 +01:00			`streaming::{alpha1, char, digit1, none_of, satisfy},`
WIP: parsers 2024-06-12 10:15:48 +01:00			`},`
FML 2024-06-14 16:53:47 +01:00			`combinator::{cond, map, map_parser, map_res, not, opt, peek, recognize, value, verify},`
			`error::{Error, ErrorKind},`
			`multi::{many0, many1, many_till},`
WIP: parsers 2024-06-12 10:15:48 +01:00			`sequence::{delimited, pair, preceded, tuple},`
			`Err, IResult, Parser,`
			`};`

initial commit 2024-03-04 16:14:28 +00:00			`// parser: parses tokens from lexer into events`
WIP: parsers 2024-06-12 10:15:48 +01:00
			`type Comment<'s> = &'s str;`

			`struct PI<'s> {`
			`target: &'s str,`
			`instruction: Option<&'s str>,`
			`}`

			`enum ContentItem<'s> {`
			`CharData(&'s str),`
			`Element(Element<'s>),`
actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`// Reference(Reference<'s>),`
			`// CDSect(CDSect<'s>),`
WIP: parsers 2024-06-12 10:15:48 +01:00			`}`

			`type Content<'s> = Option<Vec<ContentItem<'s>>>;`

			`struct Attribute<'s> {`
			`key: &'s str,`
			`value: &'s str,`
			`}`

			`/// Contains only latin characters or dash after first char`
			`type EncName<'s> = &'s str;`

			`struct DoctypeDecl<'s> {`
			`name: &'s str,`
actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`// TODO: doctype declaration parsing`
WIP: parsers 2024-06-12 10:15:48 +01:00			`}`
actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`///`
WIP: parsers 2024-06-12 10:15:48 +01:00			`pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {`
			`todo!()`
			`}`

actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`struct Element<'s> {`
			`name: &'s str,`
			`attributes: Vec<Attribute<'s>>,`
			`content: Content<'s>,`
			`}`
			`/// Element`
WIP: parsers 2024-06-12 10:15:48 +01:00			`pub fn element(input: &str) -> IResult<&str, Element> {`
			`todo!()`
			`}`

actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`enum Misc<'s> {`
			`Comment(Comment<'s>),`
			`PI(PI<'s>),`
			`}`
			`/// Misc`
WIP: parsers 2024-06-12 10:15:48 +01:00			`pub fn misc(input: &str) -> IResult<&str, Misc> {`
			`todo!()`
			`}`

			`type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);`
			`/// [1] document ::= prolog element Misc*`
			`pub fn document(input: &str) -> IResult<&str, Document> {`
			`tuple((prolog, element, many0(misc)))(input)`
			`}`

			`type Char = char;`
			`/// [2] Char ::= #x9 \| #xA \| #xD \| [#x20-#xD7FF] \| [#xE000-#xFFFD] \| [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */`
			`pub fn xmlchar(input: &str) -> IResult<&str, Char> {`
			`satisfy(`
			`\|c\| matches!(c, '\u{9}' \| '\u{A}' \| '\u{D}' \| '\u{20}'..='\u{D7FF}' \| '\u{E000}'..='\u{FFFD}' \| '\u{10000}'..='\u{10FFFF}'),`
			`)(input)`
			`}`

			`type S<'s> = &'s str;`
			`/// [3] S ::= (#x20 \| #x9 \| #xD \| #xA)+`
			`pub fn s(input: &str) -> IResult<&str, S> {`
			`is_a("\u{20}\u{9}\u{D}\u{A}")(input)`
			`}`

			`type NameStartChar = char;`
			`/// [4] NameStartChar ::= ":" \| [A-Z] \| "_" \| [a-z] \| [#xC0-#xD6] \| [#xD8-#xF6] \| [#xF8-#x2FF] \| [#x370-#x37D] \| [#x37F-#x1FFF] \| [#x200C-#x200D] \| [#x2070-#x218F] \| [#x2C00-#x2FEF] \| [#x3001-#xD7FF] \| [#xF900-#xFDCF] \| [#xFDF0-#xFFFD] \| [#x10000-#xEFFFF]`
			`pub fn name_start_char(input: &str) -> IResult<&str, NameStartChar> {`
			`satisfy(`
			`\|c\| matches!(c, ':' \| 'A'..='Z' \| '_' \| 'a'..='z' \| '\u{C0}'..='\u{D6}' \| '\u{D8}'..='\u{F6}' \| '\u{F8}'..='\u{2FF}' \| '\u{370}'..='\u{37D}' \| '\u{37F}'..='\u{1FFF}' \| '\u{200C}'..='\u{200D}' \| '\u{2070}'..='\u{218F}' \| '\u{2C00}'..='\u{2FEF}' \| '\u{3001}'..='\u{D7FF}' \| '\u{F900}'..='\u{FDCF}' \| '\u{FDF0}'..='\u{FFFD}' \| '\u{10000}'..='\u{EFFFF}'),`
			`)(input)`
			`}`

			`type NameChar = char;`
			`/// [4a] NameChar ::= NameStartChar \| "-" \| "." \| [0-9] \| #xB7 \| [#x0300-#x036F] \| [#x203F-#x2040]`
			`pub fn name_char(input: &str) -> IResult<&str, NameChar> {`
			`alt((`
			`name_start_char,`
			`satisfy(`
			`\|c\| matches!(c, '-' \| '.' \| '0'..='9' \| '\u{B7}' \| '\u{0300}'..='\u{036F}' \| '\u{203F}'..='\u{2040}'),`
			`),`
			`))(input)`
			`}`

			`type Name<'s> = &'s str;`
			`/// [5] Name ::= NameStartChar (NameChar)*`
			`pub fn name(input: &str) -> IResult<&str, Name> {`
			`recognize(pair(name_start_char, many0(name_char)))(input)`
			`}`

			`type Names<'s> = &'s str;`
			`/// [6] Names ::= Name (#x20 Name)*`
			`pub fn names(input: &str) -> IResult<&str, Names> {`
			`recognize(pair(name, many0(pair(char('\u{20}'), name))))(input)`
			`}`

			`type Nmtoken<'s> = &'s str;`
			`/// [7] Nmtoken ::= (NameChar)+`
			`pub fn nmtoken(input: &str) -> IResult<&str, Nmtoken> {`
			`recognize(many1(name_char))(input)`
			`}`

			`type Nmtokens<'s> = &'s str;`
			`/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*`
			`pub fn nmtokens(input: &str) -> IResult<&str, Nmtokens> {`
			`recognize(pair(nmtoken, many0(pair(char('\u{20}'), nmtoken))))(input)`
			`}`

			`type EntityValue<'s> = &'s str;`
			`/// [9] EntityValue ::= '"' ([^%&"] \| PEReference \| Reference)* '"'`
			`/// \| "'" ([^%&'] \| PEReference \| Reference)* "'"`
			`pub fn entity_value(input: &str) -> IResult<&str, EntityValue> {`
			`alt((`
			`delimited(`
			`char('"'),`
			`recognize(many0(alt((none_of("%&\""), pe_reference, reference)))),`
			`char('"'),`
			`),`
			`delimited(`
			`char('\''),`
			`recognize(many0(alt((none_of("%&'"), pe_reference, reference)))),`
			`char('\''),`
			`),`
			`))(input)`
			`}`

			`type AttValue<'s> = &'s str;`
			`/// [10] AttValue ::= '"' ([^<&"] \| Reference)* '"'`
			`/// \| "'" ([^<&'] \| Reference)* "'"`
			`pub fn att_value(input: &str) -> IResult<&str, AttValue> {`
			`alt((`
			`delimited(`
			`char('"'),`
			`recognize(many0(alt((none_of("<&\""), reference)))),`
			`char('"'),`
			`),`
			`delimited(`
			`char('\''),`
			`recognize(many0(alt((none_of("<&'"), reference)))),`
			`char('\''),`
			`),`
			`))(input)`
			`}`

			`type SystemLiteral<'s> = &'s str;`
			`/// [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")`
			`pub fn system_literal(input: &str) -> IResult<&str, SystemLiteral> {`
			`alt((`
			`delimited(char('"'), recognize(many0(none_of("\""))), char('"')),`
			`delimited(char('\''), recognize(many0(none_of("'"))), char('\'')),`
			`))(input)`
			`}`

			`type PubidLiteral<'s> = &'s str;`
			`/// [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"`
			`pub fn pubid_literal(input: &str) -> IResult<&str, PubidLiteral> {`
			`alt((`
			`delimited(char('"'), recognize(many0(pubid_char)), char('"')),`
			`delimited(`
			`char('\''),`
			`recognize(many0(recognize(not(char('\''))).and_then(pubid_char))),`
			`char('\''),`
			`),`
			`))(input)`
			`}`

			`type PubidChar<'s> = char;`
			`/// [13] PubidChar ::= #x20 \| #xD \| #xA \| [a-zA-Z0-9] \| [-'()+,./:=?;!*#@$_%]`
			`pub fn pubid_char(input: &str) -> IResult<&str, PubidChar> {`
			`satisfy(\|c\| matches!(c, '\u{20}' \| '\u{D}' \| '\u{A}' \| 'a'..='z' \| 'A'..='Z' \| '0'..='9'))(`
			`input,`
			`)`
			`}`

FML 2024-06-14 16:53:47 +01:00			`// TODO: wtf why doesn't this work how do i do thisjj`
WIP: parsers 2024-06-12 10:15:48 +01:00			`type CharData<'s> = &'s str;`
			`/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)`
			`pub fn char_data(input: &str) -> IResult<&str, CharData> {`
FML 2024-06-14 16:53:47 +01:00			`// tag(map(`
			`// peek(alt((`
			`// map_parser(`
			`// peek(take_until("]]>")),`
			`// nom::bytes::complete::take_till(\|c\| c == '<' \|\| c == '&'),`
			`// ),`
			`// map_parser(`
			`// peek(take_till(\|c\| c == '<' \|\| c == '&')),`
			`// nom::bytes::complete::take_until("]]>"),`
			`// ),`
			`// ))),`
			`// \|(first, _)\| first,`
			`// ))(input)`

			`// map(`
			`// tuple((is_not("<&]"), peek(alt((tag("<"), tag("&"), tag("]]>")))))),`
			`// \|(first, _)\| first,`
			`// )(input)`
			`// map(`
			`// tuple((recognize(many0(none_of("<&"))), opt(peek(tag("]]>"))))),`
			`// \|(first, _)\| first,`
			`// )(input)`
			`// alt((recognize(many0(none_of("<&"))), take_until("]]>")))(input)`
			`let tagg: &str;`
			`if let Ok((_, tagg1)) = peek(take_until::<&str, &str, Error<&str>>("]]>"))(input) {`
			`if let Ok((_, tagg2)) =`
			`peek::<&str, &str, Error<&str>, _>(take_till(\|c: char\| c == '<' \|\| c == '&'))(input)`
			`{`
			`if tagg1.len() < tagg2.len() {`
			`tagg = tagg1`
			`} else {`
			`tagg = tagg2`
			`}`
			`} else {`
			`tagg = tagg1;`
			`}`
			`} else {`
			`(_, tagg) = peek(take_till(\|c\| c == '<' \|\| c == '&'))(input)?`
			`}`
			`tag(tagg)(input)`
			`// let mut len = 0;`
			`// let ch = input.chars().collect::<Vec<_>>();`
			`// for (idx, char) in ch.as_ref().into_iter().enumerate() {`
			`// match char {`
			`// '<' \| '&' => break,`
			`// ']' => {`
			`// if idx <= ch.len() - 3 {}`
			`// },`
			`// _ => todo!(),`
			`// }`
			`// }`
			`// while let Some(char) = chars.next() {`
			`// if char == '<' \|\| char == '&' {`
			`// break;`
			`// } else if char == ']' {`
			`// if let Some(next) = chars.peek() {`
			`// if next == ']' {`
			`// if let Some(next) = chars.next_if_eq() {}`
			`// }`
			`// }`
			`// }`
			`// len += 1;`
			`// }`
			`// todo!()`
			`// recognize(many0(permutation((none_of("<&"), not(tag("]]>"))))))(input)`
			`// recognize(many0(not(alt((tag("<"), tag("&"), tag("]]>"))))))(input)`
			`// take_till(\|c\| c == '<' \|\| c == '&').and_then(take_until("]]>"))(input)`
WIP: parsers 2024-06-12 10:15:48 +01:00			`}`

			`type Prolog<'s> = (`
			`Option<XMLDecl>,`
			`Vec<Misc<'s>>,`
			`Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,`
			`);`
			`/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?`
			`pub fn prolog(input: &str) -> IResult<&str, Prolog> {`
			`tuple((`
			`opt(xml_decl),`
			`many0(misc),`
			`opt(tuple((doctypedecl, many0(misc)))),`
			`))(input)`
			`}`

			`struct XMLDecl {`
			`version_info: VersionInfo,`
actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`// encoding_decl: Option<EncodingDecl>,`
			`// sd_decl: Option<SDDecl>,`
WIP: parsers 2024-06-12 10:15:48 +01:00			`}`
			`/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'`
			`pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {`
			`// (VersionInfo, Option<EncodingDecl>, Option<SDDecl>)`
actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`let (leftover, (version_info /* encoding_decl, sd_decl */,)) = delimited(`
WIP: parsers 2024-06-12 10:15:48 +01:00			`tag("<?xml"),`
actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`tuple((version_info /* opt(encoding_decl), opt(sd_decl) */,)),`
WIP: parsers 2024-06-12 10:15:48 +01:00			`tag("?>"),`
			`)(input)?;`
			`Ok((`
			`leftover,`
			`XMLDecl {`
			`version_info,`
actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`// encoding_decl,`
			`// sd_decl,`
WIP: parsers 2024-06-12 10:15:48 +01:00			`},`
			`))`
			`}`

			`type VersionInfo = VersionNum;`
			`/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" \| '"' VersionNum '"')`
			`pub fn version_info(input: &str) -> IResult<&str, VersionInfo> {`
			`preceded(`
			`tuple((s, tag("version"), eq)),`
			`alt((`
			`delimited(char('\''), version_num, char('\'')),`
			`delimited(char('"'), version_num, char('"')),`
			`)),`
			`)(input)`
			`}`

			`/// [25] Eq ::= S? '=' S?`
			`pub fn eq(input: &str) -> IResult<&str, (Option<&str>, char, Option<&str>)> {`
			`tuple((opt(s), char('='), opt(s)))(input)`
			`}`

			`#[derive(Clone)]`
			`enum VersionNum {`
			`One,`
			`OneDotOne,`
			`}`
			`/// [26] VersionNum ::= '1.' [0-9]+`
			`pub fn version_num(input: &str) -> IResult<&str, VersionNum> {`
			`preceded(`
			`tag("1."),`
			`alt((`
			`value(VersionNum::One, char('0')),`
			`value(VersionNum::OneDotOne, char('1')),`
			`)),`
			`)(input)`
			`}`
actually get it to build for initial tests 2024-06-14 13:11:32 +01:00
			`pub fn reference(input: &str) -> IResult<&str, char> {`
			`todo!()`
			`}`

			`pub fn pe_reference(input: &str) -> IResult<&str, char> {`
			`todo!()`
			`}`

			`#[cfg(test)]`
			`mod tests {`
FML 2024-06-14 16:53:47 +01:00			`use std::num::NonZero;`

actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`use super::*;`

			`#[test]`
			`fn test_char_data() {`
			`assert_eq!(Ok(("&def]]>ghi", "abc")), char_data("abc&def]]>ghi"));`
			`assert_eq!(Ok(("]]>ghi", "abcdef")), char_data("abcdef]]>ghi"));`
			`assert_eq!(Ok(("&defghi", "abc")), char_data("abc&defghi"));`
FML 2024-06-14 16:53:47 +01:00			`assert_eq!(Ok(("]]>def&ghi", "abc")), char_data("abc]]>def&ghi"));`
			`assert_eq!(Ok(("&ghi", "abc]>def")), char_data("abc]>def&ghi"));`
			`assert_eq!(`
			`Err(Err::Incomplete(nom::Needed::Size(`
			`NonZero::new(1usize).unwrap()`
			`))),`
			`char_data("abcdefghi")`
			`);`
actually get it to build for initial tests 2024-06-14 13:11:32 +01:00			`}`
			`}`