WIP: dtd garbo
This commit is contained in:
		
							parent
							
								
									feb13be926
								
							
						
					
					
						commit
						afda87a8d7
					
				
							
								
								
									
										282
									
								
								src/parser.rs
								
								
								
								
							
							
						
						
									
										282
									
								
								src/parser.rs
								
								
								
								
							| 
						 | 
				
			
			@ -10,11 +10,12 @@ use nom::{
 | 
			
		|||
    combinator::{cond, map, map_parser, map_res, not, opt, peek, recognize, value, verify},
 | 
			
		||||
    error::{Error, ErrorKind},
 | 
			
		||||
    multi::{many0, many1, many_till},
 | 
			
		||||
    sequence::{delimited, pair, preceded, tuple},
 | 
			
		||||
    sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
 | 
			
		||||
    Err, IResult, Parser,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// parser: parses tokens from lexer into events
 | 
			
		||||
// no well formedness, validity, or data model, simple translation of input into rust types
 | 
			
		||||
 | 
			
		||||
enum ContentItem<'s> {
 | 
			
		||||
    CharData(&'s str),
 | 
			
		||||
| 
						 | 
				
			
			@ -25,15 +26,6 @@ enum ContentItem<'s> {
 | 
			
		|||
 | 
			
		||||
type Content<'s> = Option<Vec<ContentItem<'s>>>;
 | 
			
		||||
 | 
			
		||||
struct DoctypeDecl<'s> {
 | 
			
		||||
    name: &'s str,
 | 
			
		||||
    // TODO: doctype declaration parsing
 | 
			
		||||
}
 | 
			
		||||
///
 | 
			
		||||
pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
 | 
			
		||||
    todo!()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
 | 
			
		||||
/// [1]   	document	   ::=   	prolog element Misc*
 | 
			
		||||
pub fn document(input: &str) -> IResult<&str, Document> {
 | 
			
		||||
| 
						 | 
				
			
			@ -211,21 +203,20 @@ struct PI<'s> {
 | 
			
		|||
}
 | 
			
		||||
/// [16]   	PI	   ::=   	'<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
 | 
			
		||||
pub fn pi(input: &str) -> IResult<&str, PI> {
 | 
			
		||||
    let (rest, (target, instruction)) = delimited(
 | 
			
		||||
        tag("<?"),
 | 
			
		||||
        pair(
 | 
			
		||||
            pi_target,
 | 
			
		||||
            opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))),
 | 
			
		||||
    map(
 | 
			
		||||
        delimited(
 | 
			
		||||
            tag("<?"),
 | 
			
		||||
            pair(
 | 
			
		||||
                pi_target,
 | 
			
		||||
                opt(recognize(pair(s, many_till(xmlchar, peek(tag("?>")))))),
 | 
			
		||||
            ),
 | 
			
		||||
            tag("?>"),
 | 
			
		||||
        ),
 | 
			
		||||
        tag("?>"),
 | 
			
		||||
    )(input)?;
 | 
			
		||||
    Ok((
 | 
			
		||||
        rest,
 | 
			
		||||
        PI {
 | 
			
		||||
        |(target, instruction)| PI {
 | 
			
		||||
            target,
 | 
			
		||||
            instruction,
 | 
			
		||||
        },
 | 
			
		||||
    ))
 | 
			
		||||
    )(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type PITarget<'s> = &'s str;
 | 
			
		||||
| 
						 | 
				
			
			@ -288,21 +279,18 @@ struct XMLDecl<'s> {
 | 
			
		|||
}
 | 
			
		||||
/// [23]   	XMLDecl	   ::=   	'<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
 | 
			
		||||
pub fn xml_decl(input: &str) -> IResult<&str, XMLDecl> {
 | 
			
		||||
    // (VersionInfo, Option<EncodingDecl>, Option<SDDecl>)
 | 
			
		||||
    let (leftover, (version_info, encoding_decl, sd_decl)) = delimited(
 | 
			
		||||
        tag("<?xml"),
 | 
			
		||||
        tuple((version_info, opt(encoding_decl), opt(sd_decl))),
 | 
			
		||||
        pair(opt(s), tag("?>")),
 | 
			
		||||
    )(input)?;
 | 
			
		||||
    // TODO: change to map
 | 
			
		||||
    Ok((
 | 
			
		||||
        leftover,
 | 
			
		||||
        XMLDecl {
 | 
			
		||||
    map(
 | 
			
		||||
        delimited(
 | 
			
		||||
            tag("<?xml"),
 | 
			
		||||
            tuple((version_info, opt(encoding_decl), opt(sd_decl))),
 | 
			
		||||
            pair(opt(s), tag("?>")),
 | 
			
		||||
        ),
 | 
			
		||||
        |(version_info, encoding_decl, sd_decl)| XMLDecl {
 | 
			
		||||
            version_info,
 | 
			
		||||
            encoding_decl,
 | 
			
		||||
            sd_decl,
 | 
			
		||||
        },
 | 
			
		||||
    ))
 | 
			
		||||
    )(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type VersionInfo = VersionNum;
 | 
			
		||||
| 
						 | 
				
			
			@ -342,6 +330,7 @@ pub fn version_num(input: &str) -> IResult<&str, VersionNum> {
 | 
			
		|||
enum Misc<'s> {
 | 
			
		||||
    Comment(Comment<'s>),
 | 
			
		||||
    PI(PI<'s>),
 | 
			
		||||
    // TODO: how to deal with whitespace
 | 
			
		||||
    S,
 | 
			
		||||
}
 | 
			
		||||
/// [27]   	Misc	   ::=   	Comment | PI | S
 | 
			
		||||
| 
						 | 
				
			
			@ -353,6 +342,100 @@ pub fn misc(input: &str) -> IResult<&str, Misc> {
 | 
			
		|||
    ))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct DoctypeDecl<'s> {
 | 
			
		||||
    name: &'s str,
 | 
			
		||||
    external_id: Option<ExternalID<'s>>,
 | 
			
		||||
    int_subset: Option<IntSubset<'s>>,
 | 
			
		||||
}
 | 
			
		||||
/// [28]   	doctypedecl	   ::=   	'<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
 | 
			
		||||
pub fn doctypedecl(input: &str) -> IResult<&str, DoctypeDecl> {
 | 
			
		||||
    map(
 | 
			
		||||
        delimited(
 | 
			
		||||
            pair(tag("<!DOCTYPE"), s),
 | 
			
		||||
            tuple((
 | 
			
		||||
                name,
 | 
			
		||||
                opt(preceded(s, external_id)),
 | 
			
		||||
                preceded(
 | 
			
		||||
                    opt(s),
 | 
			
		||||
                    opt(terminated(
 | 
			
		||||
                        delimited(tag("["), int_subset, tag("]")),
 | 
			
		||||
                        opt(s),
 | 
			
		||||
                    )),
 | 
			
		||||
                ),
 | 
			
		||||
            )),
 | 
			
		||||
            tag(">"),
 | 
			
		||||
        ),
 | 
			
		||||
        |(name, external_id, int_subset)| DoctypeDecl {
 | 
			
		||||
            name,
 | 
			
		||||
            external_id,
 | 
			
		||||
            int_subset,
 | 
			
		||||
        },
 | 
			
		||||
    )(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
enum DeclSep<'s> {
 | 
			
		||||
    PEReference(PEReference<'s>),
 | 
			
		||||
    // TODO: tackle whitespace
 | 
			
		||||
    S,
 | 
			
		||||
}
 | 
			
		||||
/// [28a]   	DeclSep	   ::=   	PEReference | S
 | 
			
		||||
pub fn decl_sep(input: &str) -> IResult<&str, DeclSep> {
 | 
			
		||||
    alt((
 | 
			
		||||
        map(pe_reference, |pe_reference| {
 | 
			
		||||
            DeclSep::PEReference(pe_reference)
 | 
			
		||||
        }),
 | 
			
		||||
        value(DeclSep::S, s),
 | 
			
		||||
    ))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
enum IntSubsetItem<'s> {
 | 
			
		||||
    MarkupDecl(MarkupDecl<'s>),
 | 
			
		||||
    DeclSep(DeclSep<'s>),
 | 
			
		||||
}
 | 
			
		||||
type IntSubset<'s> = Vec<IntSubsetItem<'s>>;
 | 
			
		||||
/// [28b]   	intSubset	   ::=   	(markupdecl | DeclSep)*
 | 
			
		||||
pub fn int_subset(input: &str) -> IResult<&str, IntSubset> {
 | 
			
		||||
    many0(alt((
 | 
			
		||||
        map(markup_decl, |markup_decl| {
 | 
			
		||||
            IntSubsetItem::MarkupDecl(markup_decl)
 | 
			
		||||
        }),
 | 
			
		||||
        map(decl_sep, |decl_sep| IntSubsetItem::DeclSep(decl_sep)),
 | 
			
		||||
    )))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
enum MarkupDecl<'s> {
 | 
			
		||||
    ElementDecl(ElementDecl<'s>),
 | 
			
		||||
    AttlistDecl(AttlistDecl<'s>),
 | 
			
		||||
    EntityDecl(EntityDecl<'s>),
 | 
			
		||||
    NotationDecl(NotationDecl<'s>),
 | 
			
		||||
    PI(PI<'s>),
 | 
			
		||||
    Comment(Comment<'s>),
 | 
			
		||||
}
 | 
			
		||||
/// [29]   	markupdecl	   ::=   	elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
 | 
			
		||||
pub fn markup_decl(input: &str) -> IResult<&str, MarkupDecl> {
 | 
			
		||||
    alt((
 | 
			
		||||
        map(element_decl, |element_decl| {
 | 
			
		||||
            MarkupDecl::ElementDecl(element_decl)
 | 
			
		||||
        }),
 | 
			
		||||
        map(attlist_decl, |attlist_decl| {
 | 
			
		||||
            MarkupDecl::AttlistDecl(attlist_decl)
 | 
			
		||||
        }),
 | 
			
		||||
        map(entity_decl, |entity_decl| {
 | 
			
		||||
            MarkupDecl::EntityDecl(entity_decl)
 | 
			
		||||
        }),
 | 
			
		||||
        map(notation_decl, |notation_decl| {
 | 
			
		||||
            MarkupDecl::NotationDecl(notation_decl)
 | 
			
		||||
        }),
 | 
			
		||||
        map(pi, |pi| MarkupDecl::PI(pi)),
 | 
			
		||||
        map(comment, |comment| MarkupDecl::Comment(comment)),
 | 
			
		||||
    ))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// [30]   	extSubset	   ::=   	TextDecl? extSubsetDecl
 | 
			
		||||
 | 
			
		||||
/// [31]   	extSubsetDecl	   ::=   	( markupdecl | conditionalSect | DeclSep)*
 | 
			
		||||
 | 
			
		||||
type SDDecl = bool;
 | 
			
		||||
/// [32]   	SDDecl	   ::=   	S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
 | 
			
		||||
pub fn sd_decl(input: &str) -> IResult<&str, SDDecl> {
 | 
			
		||||
| 
						 | 
				
			
			@ -388,18 +471,106 @@ pub fn element(input: &str) -> IResult<&str, Element> {
 | 
			
		|||
    ))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let 
 | 
			
		||||
// let STag<'s> = (Name<'s>, );
 | 
			
		||||
/// [40]   	STag	   ::=   	'<' Name (S Attribute)* S? '>'
 | 
			
		||||
 | 
			
		||||
type Attribute<'s> = (&'s str, &'s str)
 | 
			
		||||
/// [41]   	Attribute	   ::=   	Name Eq AttValue 
 | 
			
		||||
type Attribute<'s> = (Name<'s>, AttValue<'s>);
 | 
			
		||||
/// [41]   	Attribute	   ::=   	Name Eq AttValue
 | 
			
		||||
pub fn attribute(input: &str) -> IResult<&str, Attribute> {
 | 
			
		||||
    separated_pair(name, eq, att_value)(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn reference(input: &str) -> IResult<&str, char> {
 | 
			
		||||
type CharRef<'s> = &'s str;
 | 
			
		||||
/// [66]   	CharRef	   ::=   	'&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
 | 
			
		||||
pub fn char_ref(input: &str) -> IResult<&str, CharRef> {
 | 
			
		||||
    todo!()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn pe_reference(input: &str) -> IResult<&str, char> {
 | 
			
		||||
    todo!()
 | 
			
		||||
enum Reference<'s> {
 | 
			
		||||
    EntityRef(EntityRef<'s>),
 | 
			
		||||
    CharRef(CharRef<'s>),
 | 
			
		||||
}
 | 
			
		||||
/// [67]   	Reference	   ::=   	EntityRef | CharRef
 | 
			
		||||
pub fn reference(input: &str) -> IResult<&str, Reference> {
 | 
			
		||||
    alt((
 | 
			
		||||
        map(entity_ref, |entity_ref| Reference::EntityRef(entity_ref)),
 | 
			
		||||
        map(char_ref, |char_ref| Reference::CharRef(char_ref)),
 | 
			
		||||
    ))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type EntityRef<'s> = &'s str;
 | 
			
		||||
/// [68]   	EntityRef	   ::=   	'&' Name ';'
 | 
			
		||||
pub fn entity_ref(input: &str) -> IResult<&str, EntityRef> {
 | 
			
		||||
    delimited(tag("&"), name, tag(";"))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type PEReference<'s> = &'s str;
 | 
			
		||||
/// [69]   	PEReference	   ::=   	'%' Name ';'
 | 
			
		||||
pub fn pe_reference(input: &str) -> IResult<&str, PEReference> {
 | 
			
		||||
    delimited(tag("%"), name, tag(";"))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// TODO: entity declarations
 | 
			
		||||
 | 
			
		||||
enum ExternalID<'s> {
 | 
			
		||||
    SYSTEM {
 | 
			
		||||
        system_identifier: &'s str,
 | 
			
		||||
    },
 | 
			
		||||
    PUBLIC {
 | 
			
		||||
        public_identifier: &'s str,
 | 
			
		||||
        system_identifier: &'s str,
 | 
			
		||||
    },
 | 
			
		||||
}
 | 
			
		||||
/// [75]   	ExternalID	   ::=   	'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
 | 
			
		||||
// pub fn external_id(input: &str) -> IResult<&str, ExternalID> {
 | 
			
		||||
pub fn external_id(input: &str) -> IResult<&str, ExternalID> {
 | 
			
		||||
    alt((
 | 
			
		||||
        map(
 | 
			
		||||
            preceded(pair(tag("SYSTEM"), s), system_literal),
 | 
			
		||||
            |system_identifier| ExternalID::SYSTEM { system_identifier },
 | 
			
		||||
        ),
 | 
			
		||||
        map(
 | 
			
		||||
            preceded(
 | 
			
		||||
                pair(tag("PUBLIC"), s),
 | 
			
		||||
                separated_pair(pubid_literal, s, system_literal),
 | 
			
		||||
            ),
 | 
			
		||||
            |(public_identifier, system_identifier)| ExternalID::PUBLIC {
 | 
			
		||||
                public_identifier,
 | 
			
		||||
                system_identifier,
 | 
			
		||||
            },
 | 
			
		||||
        ),
 | 
			
		||||
    ))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type NDataDecl<'s> = &'s str;
 | 
			
		||||
/// [76]   	NDataDecl	   ::=   	S 'NDATA' S Name
 | 
			
		||||
pub fn ndata_decl(input: &str) -> IResult<&str, NDataDecl> {
 | 
			
		||||
    preceded(tuple((s, tag("NDATA"), s)), name)(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct TextDecl<'s> {
 | 
			
		||||
    version_info: Option<VersionInfo>,
 | 
			
		||||
    encoding_decl: EncodingDecl<'s>,
 | 
			
		||||
}
 | 
			
		||||
/// [77]   	TextDecl	   ::=   	'<?xml' VersionInfo? EncodingDecl S? '?>'
 | 
			
		||||
pub fn text_decl(input: &str) -> IResult<&str, TextDecl> {
 | 
			
		||||
    map(
 | 
			
		||||
        delimited(
 | 
			
		||||
            tag("<?xml"),
 | 
			
		||||
            pair(opt(version_info), terminated(encoding_decl, opt(s))),
 | 
			
		||||
            tag("?>"),
 | 
			
		||||
        ),
 | 
			
		||||
        |(version_info, encoding_decl)| TextDecl {
 | 
			
		||||
            version_info,
 | 
			
		||||
            encoding_decl,
 | 
			
		||||
        },
 | 
			
		||||
    )(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type extParsedEnt<'s> = (Option<TextDecl<'s>>, Content<'s>);
 | 
			
		||||
/// [78]   	extParsedEnt	   ::=   	TextDecl? content
 | 
			
		||||
pub fn ext_parsed_ent(input: &str) -> IResult<&str, extParsedEnt> {
 | 
			
		||||
    pair(opt(text_decl), content)(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type EncodingDecl<'s> = EncName<'s>;
 | 
			
		||||
| 
						 | 
				
			
			@ -425,6 +596,41 @@ pub fn enc_name(input: &str) -> IResult<&str, EncName> {
 | 
			
		|||
    ))(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct NotationDecl<'s> {
 | 
			
		||||
    name: &'s str,
 | 
			
		||||
    id: NotationDeclID<'s>,
 | 
			
		||||
}
 | 
			
		||||
enum NotationDeclID<'s> {
 | 
			
		||||
    External(ExternalID<'s>),
 | 
			
		||||
    Public(PublicID<'s>),
 | 
			
		||||
}
 | 
			
		||||
/// [82]   	NotationDecl	   ::=   	'<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
 | 
			
		||||
pub fn notation_decl(input: &str) -> IResult<&str, NotationDecl> {
 | 
			
		||||
    map(
 | 
			
		||||
        delimited(
 | 
			
		||||
            pair(tag("<!NOTATION"), s),
 | 
			
		||||
            separated_pair(
 | 
			
		||||
                name,
 | 
			
		||||
                s,
 | 
			
		||||
                alt((
 | 
			
		||||
                    map(external_id, |external_id| {
 | 
			
		||||
                        NotationDeclID::External(external_id)
 | 
			
		||||
                    }),
 | 
			
		||||
                    map(public_id, |public_id| NotationDeclID::Public(public_id)),
 | 
			
		||||
                )),
 | 
			
		||||
            ),
 | 
			
		||||
            pair(opt(s), tag(">")),
 | 
			
		||||
        ),
 | 
			
		||||
        |(name, id)| NotationDecl { name, id },
 | 
			
		||||
    )(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type PublicID<'s> = &'s str;
 | 
			
		||||
/// [83]   	PublicID	   ::=   	'PUBLIC' S PubidLiteral
 | 
			
		||||
pub fn public_id(input: &str) -> IResult<&str, PublicID> {
 | 
			
		||||
    preceded(pair(tag("PUBLIC"), s), pubid_literal)(input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
mod tests {
 | 
			
		||||
    use std::num::NonZero;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue