diff --git a/src/writer.rs b/src/writer.rs index 456a5a1..08be8c2 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -5,8 +5,9 @@ use crate::{ error::Error, }; +// pub struct Writer { pub struct Writer { - stream: W, + writer: W, depth: Vec, namespaces: Vec<(usize, Namespace)>, } diff --git a/src/xml/composers.rs b/src/xml/composers.rs new file mode 100644 index 0000000..3313a56 --- /dev/null +++ b/src/xml/composers.rs @@ -0,0 +1,735 @@ +use std::io; + +use tokio::io::{AsyncWrite, AsyncWriteExt}; + +use super::{ + AttValue, AttValueData, CDEnd, CDSect, CDStart, CData, Char, CharData, Comment, DeclSep, + DefaultAttName, DoctypeDecl, Document, Element, EntityValue, EntityValueData, Eq, ExtSubset, + ExtSubsetDecl, IntSubset, LocalPart, MarkupDecl, Misc, NCName, NSAttName, Name, NameChar, + NameStartChar, Names, Nmtoken, Nmtokens, PITarget, Prefix, PrefixedAttName, PrefixedName, + Prolog, PubidChar, PubidLiteral, QName, SDDecl, SystemLiteral, UnprefixedName, VersionInfo, + VersionNum, XMLDecl, PI, S, +}; + +/// Compact Composer trait, can create different trait later for pretty composition +pub trait Composer<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite; +} + +// namespaces in xml + +/// [1] NSAttName ::= PrefixedAttName | DefaultAttName +impl<'s> Composer<'s> for NSAttName<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: AsyncWrite + Unpin, + { + match self { + NSAttName::PrefixedAttName(prefixed_att_name) => { + prefixed_att_name.write(writer).await? + } + NSAttName::DefaultAttName => DefaultAttName.write(writer).await?, + } + Ok(()) + } +} + +/// [2] PrefixedAttName ::= 'xmlns:' NCName +impl<'s> Composer<'s> for PrefixedAttName<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: AsyncWrite + Unpin, + { + writer.write_all("xmlns:".as_bytes()).await?; + self.0.write(writer).await?; + Ok(()) + } +} + +/// [3] DefaultAttName ::= 'xmlns'; +impl Composer<'_> for DefaultAttName { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: AsyncWrite + Unpin, + { + writer.write_all("xmlns".as_bytes()).await?; + Ok(()) + } +} + +/// [4] NCName ::= Name - (Char* ':' Char*) +impl<'s> Composer<'s> for NCName<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: AsyncWrite + Unpin, + { + writer.write_all(self.0.as_bytes()).await?; + Ok(()) + } +} + +/// [7] QName ::= PrefixedName | UnprefixedName +impl<'s> Composer<'s> for QName<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: AsyncWrite + Unpin, + { + match self { + QName::PrefixedName(prefixed_name) => prefixed_name.write(writer).await?, + QName::UnprefixedName(unprefixed_name) => unprefixed_name.write(writer).await?, + } + Ok(()) + } +} + +/// [8] PrefixedName ::= Prefix ':' LocalPart +impl<'s> Composer<'s> for PrefixedName<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: AsyncWrite + Unpin, + { + self.prefix.write(writer).await?; + writer.write_all(":".as_bytes()).await?; + self.local_part.write(writer).await?; + Ok(()) + } +} + +/// [9] UnprefixedName ::= LocalPart +impl<'s> Composer<'s> for UnprefixedName<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + self.0.write(writer).await?; + Ok(()) + } +} + +/// [10] Prefix ::= NCName +impl<'s> Composer<'s> for Prefix<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + self.0.write(writer).await?; + Ok(()) + } +} + +/// [11] LocalPart ::= NCName +impl<'s> Composer<'s> for LocalPart<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + self.0.write(writer).await?; + Ok(()) + } +} + +// xml spec + +/// [1] document ::= prolog element Misc* +impl<'s> Composer<'s> for Document<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + self.0.write(writer).await?; + self.1.write(writer).await?; + for misc in &self.2 { + misc.write(writer).await? + } + Ok(()) + } +} + +/// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ +impl Composer<'_> for Char { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all(self.0.to_string().as_bytes()).await?; + Ok(()) + } +} + +/// [3] S ::= (#x20 | #x9 | #xD | #xA)+ +impl<'s> Composer<'s> for S { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all("\u{20}".as_bytes()).await?; + Ok(()) + } +} + +/// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] +impl Composer<'_> for NameStartChar { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all(self.0.to_string().as_bytes()).await?; + Ok(()) + } +} + +/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] +impl Composer<'_> for NameChar { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all(self.0.to_string().as_bytes()).await?; + Ok(()) + } +} + +/// [5] Name ::= NameStartChar (NameChar)* +impl<'s> Composer<'s> for Name<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all(self.0.as_bytes()).await?; + Ok(()) + } +} + +/// [6] Names ::= Name (#x20 Name)* +impl<'s> Composer<'s> for Names<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + let mut first = true; + for name in &self.0 { + if !first { + writer.write_all("\u{20}".as_bytes()).await?; + } + name.write(writer).await?; + if first { + first = false + } + } + Ok(()) + } +} + +/// [7] Nmtoken ::= (NameChar)+ +impl<'s> Composer<'s> for Nmtoken<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all(self.0.as_bytes()).await?; + Ok(()) + } +} + +/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* +impl<'s> Composer<'s> for Nmtokens<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + let mut first = true; + for nmtoken in &self.0 { + if !first { + writer.write_all("\u{20}".as_bytes()).await?; + } + nmtoken.write(writer).await?; + if first { + first = false + } + } + Ok(()) + } +} + +/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' +/// | "'" ([^%&'] | PEReference | Reference)* "'" +impl<'s> Composer<'s> for EntityValue<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + match self { + EntityValue::DoubleQuoted(entity_value_data) => { + writer.write_all("\"".as_bytes()).await?; + for entity_value_data in entity_value_data { + match entity_value_data { + EntityValueData::String(s) => writer.write_all(s.as_bytes()).await?, + EntityValueData::PEReference(pe_reference) => { + pe_reference.write(writer).await? + } + EntityValueData::Reference(reference) => reference.write(writer).await?, + } + } + writer.write_all("\"".as_bytes()).await?; + } + EntityValue::SingleQuoted(entity_value_data) => { + writer.write_all("'".as_bytes()).await?; + for entity_value_data in entity_value_data { + match entity_value_data { + EntityValueData::String(s) => writer.write_all(s.as_bytes()).await?, + EntityValueData::PEReference(pe_reference) => { + pe_reference.write(writer).await? + } + EntityValueData::Reference(reference) => reference.write(writer).await?, + } + } + writer.write_all("'".as_bytes()).await?; + } + } + Ok(()) + } +} + +/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"' +/// | "'" ([^<&'] | Reference)* "'" +impl<'s> Composer<'s> for AttValue<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + match self { + AttValue::DoubleQuoted(att_value_data) => { + writer.write_all("\"".as_bytes()).await?; + for att_value_data in att_value_data { + match att_value_data { + AttValueData::String(s) => writer.write_all(s.as_bytes()).await?, + AttValueData::Reference(reference) => reference.write(writer).await?, + } + } + writer.write_all("\"".as_bytes()).await?; + } + AttValue::SingleQuoted(att_value_data) => { + writer.write_all("'".as_bytes()).await?; + for att_value_data in att_value_data { + match att_value_data { + AttValueData::String(s) => writer.write_all(s.as_bytes()).await?, + AttValueData::Reference(reference) => reference.write(writer).await?, + } + } + writer.write_all("'".as_bytes()).await?; + } + } + Ok(()) + } +} + +/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") +impl<'s> Composer<'s> for SystemLiteral<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + match self { + SystemLiteral::DoubleQuoted(s) => { + writer.write_all("\"".as_bytes()).await?; + writer.write_all(s.as_bytes()).await?; + writer.write_all("\"".as_bytes()).await?; + } + SystemLiteral::SingleQuoted(s) => { + writer.write_all("'".as_bytes()).await?; + writer.write_all(s.as_bytes()).await?; + writer.write_all("'".as_bytes()).await?; + } + } + Ok(()) + } +} + +/// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" +impl<'s> Composer<'s> for PubidLiteral<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + match self { + PubidLiteral::DoubleQuoted(s) => { + writer.write_all("\"".as_bytes()).await?; + writer.write_all(s.as_bytes()).await?; + writer.write_all("\"".as_bytes()).await?; + } + PubidLiteral::SingleQuoted(s) => { + writer.write_all("'".as_bytes()).await?; + writer.write_all(s.as_bytes()).await?; + writer.write_all("'".as_bytes()).await?; + } + } + Ok(()) + } +} + +/// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] +impl Composer<'_> for PubidChar { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all(self.0.to_string().as_bytes()).await?; + Ok(()) + } +} + +/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) +impl<'s> Composer<'s> for CharData<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all(self.0.as_bytes()).await?; + Ok(()) + } +} + +/// [15] Comment ::= '' +impl<'s> Composer<'s> for Comment<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all("".as_bytes()).await?; + Ok(()) + } +} + +/// [16] PI ::= '' Char*)))? '?>' +impl<'s> Composer<'s> for PI<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all("".as_bytes()).await?; + Ok(()) + } +} + +/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) +impl<'s> Composer<'s> for PITarget<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + self.0.write(writer).await?; + Ok(()) + } +} + +/// [18] CDSect ::= CDStart CData CDEnd +impl<'s> Composer<'s> for CDSect<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + CDStart.write(writer).await?; + self.0.write(writer).await?; + CDEnd.write(writer).await?; + Ok(()) + } +} + +/// [19] CDStart ::= ' for CDStart { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all("' Char*)) +impl<'s> Composer<'s> for CData<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all(self.0.as_bytes()).await?; + Ok(()) + } +} + +/// [21] CDEnd ::= ']]>' +impl Composer<'_> for CDEnd { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all("]]>".as_bytes()).await?; + Ok(()) + } +} + +/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? +impl<'s> Composer<'s> for Prolog<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + if let Some(xml_decl) = &self.0 { + xml_decl.write(writer).await?; + } + for misc in &self.1 { + misc.write(writer).await?; + } + if let Some((doctype_decl, miscs)) = &self.2 { + doctype_decl.write(writer).await?; + for misc in miscs { + misc.write(writer).await?; + } + } + Ok(()) + } +} + +/// [23] XMLDecl ::= '' +impl<'s> Composer<'s> for XMLDecl<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all("".as_bytes()).await?; + Ok(()) + } +} + +/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') +impl Composer<'_> for VersionInfo { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + S.write(writer).await?; + writer.write_all("version".as_bytes()).await?; + Eq.write(writer).await?; + match self { + VersionInfo::SingleQuoted(version_num) => { + writer.write_all("'".as_bytes()).await?; + version_num.write(writer).await?; + writer.write_all("'".as_bytes()).await?; + } + VersionInfo::DoubleQuoted(version_num) => { + writer.write_all("\"".as_bytes()).await?; + version_num.write(writer).await?; + writer.write_all("\"".as_bytes()).await?; + } + } + Ok(()) + } +} + +/// [25] Eq ::= S? '=' S? +impl Composer<'_> for Eq { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all("=".as_bytes()).await?; + Ok(()) + } +} + +/// [26] VersionNum ::= '1.' [0-9]+ +impl Composer<'_> for VersionNum { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + match self { + VersionNum::One => writer.write_all("1.0".as_bytes()).await?, + VersionNum::OneDotOne => writer.write_all("1.1".as_bytes()).await?, + } + Ok(()) + } +} + +/// [27] Misc ::= Comment | PI | S +impl<'s> Composer<'s> for Misc<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + match self { + Misc::Comment(comment) => comment.write(writer).await?, + Misc::PI(pi) => pi.write(writer).await?, + Misc::S => {} + } + Ok(()) + } +} + +/// [16] doctypedecl ::= '' +/// [28] doctypedecl ::= '' +impl<'s> Composer<'s> for DoctypeDecl<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + writer.write_all("".as_bytes()).await?; + Ok(()) + } +} + +/// [28a] DeclSep ::= PEReference | S +impl<'s> Composer<'s> for DeclSep<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + match self { + DeclSep::PEReference(pe_reference) => pe_reference.write(writer).await?, + DeclSep::S => S.write(writer).await?, + } + Ok(()) + } +} + +/// [28b] intSubset ::= (markupdecl | DeclSep)* +impl<'s> Composer<'s> for IntSubset<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + for declaration in self { + match declaration { + super::IntSubsetDeclaration::MarkupDecl(markup_decl) => { + markup_decl.write(writer).await? + } + super::IntSubsetDeclaration::DeclSep(decl_sep) => decl_sep.write(writer).await?, + } + } + Ok(()) + } +} + +/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment +impl<'s> Composer<'s> for MarkupDecl<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + match self { + MarkupDecl::Elementdecl(elementdecl) => elementdecl.write(writer).await?, + MarkupDecl::AttlistDecl(attlist_decl) => attlist_decl.write(writer).await?, + MarkupDecl::EntityDecl(entity_decl) => entity_decl.write(writer).await?, + MarkupDecl::NotationDecl(notation_decl) => notation_decl.write(writer).await?, + MarkupDecl::PI(pi) => pi.write(writer).await?, + MarkupDecl::Comment(comment) => comment.write(writer).await?, + } + Ok(()) + } +} + +/// [30] extSubset ::= TextDecl? extSubsetDecl +impl<'s> Composer<'s> for ExtSubset<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + if let Some(text_decl) = self.text_decl { + text_decl.write(writer).await? + } + self.ext_subset_decl.write(writer).await?; + Ok(()) + } +} + +/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)* +impl<'s> Composer<'s> for ExtSubsetDecl<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + for declaration in self { + match declaration { + super::ExtSubsetDeclaration::MarkupDecl(markup_decl) => { + markup_decl.write(writer).await? + } + super::ExtSubsetDeclaration::ConditionalSect(conditional_sect) => { + conditional_sect.write(writer).await? + } + super::ExtSubsetDeclaration::DeclSep(decl_sep) => decl_sep.write(writer).await?, + } + } + Ok(()) + } +} + +/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) +impl Composer<'_> for SDDecl { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + S.write(writer).await?; + writer.write_all("standalone".as_bytes()).await?; + Eq.write(writer).await?; + match self { + SDDecl::SingleQuoted(sd_decl) => { + writer.write_all("'".as_bytes()).await?; + match sd_decl { + true => writer.write_all("yes".as_bytes()).await?, + false => writer.write_all("no".as_bytes()).await?, + } + writer.write_all("'".as_bytes()).await?; + } + SDDecl::DoubleQuoted(sd_decl) => { + writer.write_all("\"".as_bytes()).await?; + match sd_decl { + true => writer.write_all("yes".as_bytes()).await?, + false => writer.write_all("no".as_bytes()).await?, + } + writer.write_all("\"".as_bytes()).await?; + } + } + Ok(()) + } +} + +/// [39] element ::= EmptyElemTag | STag content ETag +impl<'s> Composer<'s> for Element<'s> { + async fn write(&self, writer: &mut W) -> io::Result<()> + where + W: Unpin + AsyncWrite, + { + todo!() + } +} diff --git a/src/xml/mod.rs b/src/xml/mod.rs index d22abd6..8df2f41 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -58,8 +58,9 @@ pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec>); pub struct Char(char); /// [3] S ::= (#x20 | #x9 | #xD | #xA)+ +#[derive(Clone)] #[repr(transparent)] -pub struct S<'s>(&'s str); +pub struct S; /// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] #[repr(transparent)] @@ -88,7 +89,7 @@ pub struct Nmtoken<'s>(&'s str); pub struct Nmtokens<'s>(Vec>); #[derive(Clone, Debug)] -pub enum LiteralData<'s> { +pub enum EntityValueData<'s> { String(&'s str), PEReference(PEReference<'s>), Reference(Reference<'s>), @@ -96,24 +97,37 @@ pub enum LiteralData<'s> { /// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' /// | "'" ([^%&'] | PEReference | Reference)* "'" #[derive(Debug)] -#[repr(transparent)] -pub struct EntityValue<'s>(Vec>); +pub enum EntityValue<'s> { + DoubleQuoted(Vec>), + SingleQuoted(Vec>), +} +#[derive(Clone, Debug)] +pub enum AttValueData<'s> { + String(&'s str), + Reference(Reference<'s>), +} /// [10] AttValue ::= '"' ([^<&"] | Reference)* '"' /// | "'" ([^<&'] | Reference)* "'" #[derive(Clone, Debug)] -#[repr(transparent)] -pub struct AttValue<'s>(Vec>); +pub enum AttValue<'s> { + DoubleQuoted(Vec>), + SingleQuoted(Vec>), +} /// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") #[derive(Debug)] -#[repr(transparent)] -pub struct SystemLiteral<'s>(&'s str); +pub enum SystemLiteral<'s> { + DoubleQuoted(&'s str), + SingleQuoted(&'s str), +} /// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" #[derive(Debug)] -#[repr(transparent)] -pub struct PubidLiteral<'s>(&'s str); +pub enum PubidLiteral<'s> { + DoubleQuoted(&'s str), + SingleQuoted(&'s str), +} /// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] #[repr(transparent)] @@ -176,7 +190,10 @@ pub struct XMLDecl<'s> { /// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') #[derive(Debug)] -pub struct VersionInfo(VersionNum); +pub enum VersionInfo { + SingleQuoted(VersionNum), + DoubleQuoted(VersionNum), +} /// [25] Eq ::= S? '=' S? #[derive(Clone)] @@ -220,6 +237,7 @@ pub enum IntSubsetDeclaration<'s> { MarkupDecl(MarkupDecl<'s>), DeclSep(DeclSep<'s>), } +/// from [16] intSubset ::= (markupdecl | PEReference | S)* /// [28b] intSubset ::= (markupdecl | DeclSep)* pub type IntSubset<'s> = Vec>; @@ -249,7 +267,11 @@ pub enum ExtSubsetDeclaration<'s> { type ExtSubsetDecl<'s> = Vec>; /// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) -pub type SDDecl = bool; +#[derive(Debug, Clone)] +pub enum SDDecl { + SingleQuoted(bool), + DoubleQuoted(bool), +} // (Productions 33 through 38 have been removed.) diff --git a/src/xml/parsers.rs b/src/xml/parsers.rs index 6326593..93ff5b1 100644 --- a/src/xml/parsers.rs +++ b/src/xml/parsers.rs @@ -15,18 +15,18 @@ use nom::{ use crate::xml::NSAttName; use super::{ - AttDef, AttDefName, AttType, AttValue, AttlistDecl, Attribute, CDEnd, CDSect, CDStart, CData, - Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment, ConditionalSect, Content, - ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName, DefaultDecl, DoctypeDecl, - Document, ETag, Element, Elementdecl, EmptyElemTag, EncName, EncodingDecl, EntityDecl, - EntityDef, EntityRef, EntityValue, EnumeratedType, Enumeration, Eq, ExtParsedEnt, ExtSubset, - ExtSubsetDecl, ExtSubsetDeclaration, ExternalID, GEDecl, Ignore, IgnoreSect, - IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration, LiteralData, LocalPart, - MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names, Nmtoken, - Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef, PEReference, - PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral, PublicID, - QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl, TokenizedType, - UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S, + AttDef, AttDefName, AttType, AttValue, AttValueData, AttlistDecl, Attribute, CDEnd, CDSect, + CDStart, CData, Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment, + ConditionalSect, Content, ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName, + DefaultDecl, DoctypeDecl, Document, ETag, Element, Elementdecl, EmptyElemTag, EncName, + EncodingDecl, EntityDecl, EntityDef, EntityRef, EntityValue, EntityValueData, EnumeratedType, + Enumeration, Eq, ExtParsedEnt, ExtSubset, ExtSubsetDecl, ExtSubsetDeclaration, ExternalID, + GEDecl, Ignore, IgnoreSect, IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration, + LocalPart, MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names, + Nmtoken, Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef, + PEReference, PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral, + PublicID, QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl, + TokenizedType, UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S, }; pub trait Parser<'s, T> { @@ -141,9 +141,11 @@ impl Parser<'_, Char> for Char { } /// [3] S ::= (#x20 | #x9 | #xD | #xA)+ -impl<'s> Parser<'s, S<'s>> for S<'s> { - fn parse(input: &'s str) -> IResult<&str, S<'s>> { - map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input) +impl Parser<'_, S> for S { + fn parse(input: &str) -> IResult<&str, S> { + // TODO?: whitespacing + // map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input) + value(S, is_a("\u{20}\u{9}\u{D}\u{A}"))(input) } } @@ -221,43 +223,46 @@ impl<'s> Parser<'s, Nmtokens<'s>> for Nmtokens<'s> { /// | "'" ([^%&'] | PEReference | Reference)* "'" impl<'s> Parser<'s, EntityValue<'s>> for EntityValue<'s> { fn parse(input: &'s str) -> IResult<&str, EntityValue<'s>> { - map( - alt(( + alt(( + map( delimited( char('"'), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&\"")))), - |string| LiteralData::String(string), + |string| EntityValueData::String(string), ), map(PEReference::parse, |pe_reference| { - LiteralData::PEReference(pe_reference) + EntityValueData::PEReference(pe_reference) }), map(Reference::parse, |reference| { - LiteralData::Reference(reference) + EntityValueData::Reference(reference) }), ))), char('"'), ), + |entity_value| EntityValue::DoubleQuoted(entity_value), + ), + map( delimited( char('\''), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&'")))), - |string| LiteralData::String(string), + |string| EntityValueData::String(string), ), map(PEReference::parse, |pe_reference| { - LiteralData::PEReference(pe_reference) + EntityValueData::PEReference(pe_reference) }), map(Reference::parse, |reference| { - LiteralData::Reference(reference) + EntityValueData::Reference(reference) }), ))), char('\''), ), - )), - |entity_value| EntityValue(entity_value), - )(input) + |entity_value| EntityValue::SingleQuoted(entity_value), + ), + ))(input) } } @@ -265,67 +270,76 @@ impl<'s> Parser<'s, EntityValue<'s>> for EntityValue<'s> { /// | "'" ([^<&'] | Reference)* "'" impl<'s> Parser<'s, AttValue<'s>> for AttValue<'s> { fn parse(input: &'s str) -> IResult<&str, AttValue<'s>> { - map( - alt(( + alt(( + map( delimited( char('"'), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&\"")))), - |string| LiteralData::String(string), + |string| AttValueData::String(string), ), map(Reference::parse, |reference| { - LiteralData::Reference(reference) + AttValueData::Reference(reference) }), ))), char('"'), ), + |att_value| AttValue::DoubleQuoted(att_value), + ), + map( delimited( char('\''), many0(alt(( map( recognize(many_till(take(1usize), peek(one_of("%&'")))), - |string| LiteralData::String(string), + |string| AttValueData::String(string), ), map(Reference::parse, |reference| { - LiteralData::Reference(reference) + AttValueData::Reference(reference) }), ))), char('\''), ), - )), - |att_value| AttValue(att_value), - )(input) + |att_value| AttValue::SingleQuoted(att_value), + ), + ))(input) } } /// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") impl<'s> Parser<'s, SystemLiteral<'s>> for SystemLiteral<'s> { fn parse(input: &'s str) -> IResult<&str, SystemLiteral<'s>> { - map( - alt(( + alt(( + map( delimited(char('"'), recognize(many0(none_of("\""))), char('"')), + |system_literal| SystemLiteral::DoubleQuoted(system_literal), + ), + map( delimited(char('\''), recognize(many0(none_of("'"))), char('\'')), - )), - |system_literal| SystemLiteral(system_literal), - )(input) + |system_literal| SystemLiteral::SingleQuoted(system_literal), + ), + ))(input) } } /// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" impl<'s> Parser<'s, PubidLiteral<'s>> for PubidLiteral<'s> { fn parse(input: &'s str) -> IResult<&str, PubidLiteral<'s>> { - map( - alt(( + alt(( + map( delimited(char('"'), recognize(many0(PubidChar::parse)), char('"')), + |pubid_literal| PubidLiteral::DoubleQuoted(pubid_literal), + ), + map( delimited( char('\''), recognize(many0(recognize(not(char('\''))).and_then(PubidChar::parse))), char('\''), ), - )), - |pubid_literal| PubidLiteral(pubid_literal), - )(input) + |pubid_literal| PubidLiteral::SingleQuoted(pubid_literal), + ), + ))(input) } } @@ -477,15 +491,18 @@ impl<'s> Parser<'s, XMLDecl<'s>> for XMLDecl<'s> { /// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') impl Parser<'_, VersionInfo> for VersionInfo { fn parse(input: &'_ str) -> IResult<&str, VersionInfo> { - map( - preceded( - tuple((S::parse, tag("version"), Eq::parse)), - alt(( + preceded( + tuple((S::parse, tag("version"), Eq::parse)), + alt(( + map( delimited(char('\''), VersionNum::parse, char('\'')), + |version_info| VersionInfo::SingleQuoted(version_info), + ), + map( delimited(char('"'), VersionNum::parse, char('"')), - )), - ), - |version_num| VersionInfo(version_num), + |version_info| VersionInfo::DoubleQuoted(version_info), + ), + )), )(input) } } @@ -639,12 +656,18 @@ impl Parser<'_, SDDecl> for SDDecl { alt(( delimited( char('\''), - alt((value(true, tag("yes")), value(false, tag("no")))), + alt(( + value(SDDecl::SingleQuoted(true), tag("yes")), + value(SDDecl::SingleQuoted(false), tag("no")), + )), char('\''), ), delimited( char('"'), - alt((value(true, tag("yes")), value(false, tag("no")))), + alt(( + value(SDDecl::DoubleQuoted(true), tag("yes")), + value(SDDecl::DoubleQuoted(false), tag("no")), + )), char('"'), ), )),