WIP: XML composers

This commit is contained in:
cel 🌸 2024-11-01 18:36:11 +00:00
parent c6c3c1b403
commit d0a8d25d25
4 changed files with 848 additions and 67 deletions

View File

@ -5,8 +5,9 @@ use crate::{
error::Error,
};
// pub struct Writer<W, C = Composer> {
pub struct Writer<W> {
stream: W,
writer: W,
depth: Vec<Name>,
namespaces: Vec<(usize, Namespace)>,
}

735
src/xml/composers.rs Normal file
View File

@ -0,0 +1,735 @@
use std::io;
use tokio::io::{AsyncWrite, AsyncWriteExt};
use super::{
AttValue, AttValueData, CDEnd, CDSect, CDStart, CData, Char, CharData, Comment, DeclSep,
DefaultAttName, DoctypeDecl, Document, Element, EntityValue, EntityValueData, Eq, ExtSubset,
ExtSubsetDecl, IntSubset, LocalPart, MarkupDecl, Misc, NCName, NSAttName, Name, NameChar,
NameStartChar, Names, Nmtoken, Nmtokens, PITarget, Prefix, PrefixedAttName, PrefixedName,
Prolog, PubidChar, PubidLiteral, QName, SDDecl, SystemLiteral, UnprefixedName, VersionInfo,
VersionNum, XMLDecl, PI, S,
};
/// Compact Composer trait, can create different trait later for pretty composition
pub trait Composer<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite;
}
// namespaces in xml
/// [1] NSAttName ::= PrefixedAttName | DefaultAttName
impl<'s> Composer<'s> for NSAttName<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: AsyncWrite + Unpin,
{
match self {
NSAttName::PrefixedAttName(prefixed_att_name) => {
prefixed_att_name.write(writer).await?
}
NSAttName::DefaultAttName => DefaultAttName.write(writer).await?,
}
Ok(())
}
}
/// [2] PrefixedAttName ::= 'xmlns:' NCName
impl<'s> Composer<'s> for PrefixedAttName<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: AsyncWrite + Unpin,
{
writer.write_all("xmlns:".as_bytes()).await?;
self.0.write(writer).await?;
Ok(())
}
}
/// [3] DefaultAttName ::= 'xmlns';
impl Composer<'_> for DefaultAttName {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: AsyncWrite + Unpin,
{
writer.write_all("xmlns".as_bytes()).await?;
Ok(())
}
}
/// [4] NCName ::= Name - (Char* ':' Char*)
impl<'s> Composer<'s> for NCName<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: AsyncWrite + Unpin,
{
writer.write_all(self.0.as_bytes()).await?;
Ok(())
}
}
/// [7] QName ::= PrefixedName | UnprefixedName
impl<'s> Composer<'s> for QName<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: AsyncWrite + Unpin,
{
match self {
QName::PrefixedName(prefixed_name) => prefixed_name.write(writer).await?,
QName::UnprefixedName(unprefixed_name) => unprefixed_name.write(writer).await?,
}
Ok(())
}
}
/// [8] PrefixedName ::= Prefix ':' LocalPart
impl<'s> Composer<'s> for PrefixedName<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: AsyncWrite + Unpin,
{
self.prefix.write(writer).await?;
writer.write_all(":".as_bytes()).await?;
self.local_part.write(writer).await?;
Ok(())
}
}
/// [9] UnprefixedName ::= LocalPart
impl<'s> Composer<'s> for UnprefixedName<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
self.0.write(writer).await?;
Ok(())
}
}
/// [10] Prefix ::= NCName
impl<'s> Composer<'s> for Prefix<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
self.0.write(writer).await?;
Ok(())
}
}
/// [11] LocalPart ::= NCName
impl<'s> Composer<'s> for LocalPart<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
self.0.write(writer).await?;
Ok(())
}
}
// xml spec
/// [1] document ::= prolog element Misc*
impl<'s> Composer<'s> for Document<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
self.0.write(writer).await?;
self.1.write(writer).await?;
for misc in &self.2 {
misc.write(writer).await?
}
Ok(())
}
}
/// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
impl Composer<'_> for Char {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all(self.0.to_string().as_bytes()).await?;
Ok(())
}
}
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
impl<'s> Composer<'s> for S {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all("\u{20}".as_bytes()).await?;
Ok(())
}
}
/// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
impl Composer<'_> for NameStartChar {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all(self.0.to_string().as_bytes()).await?;
Ok(())
}
}
/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
impl Composer<'_> for NameChar {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all(self.0.to_string().as_bytes()).await?;
Ok(())
}
}
/// [5] Name ::= NameStartChar (NameChar)*
impl<'s> Composer<'s> for Name<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all(self.0.as_bytes()).await?;
Ok(())
}
}
/// [6] Names ::= Name (#x20 Name)*
impl<'s> Composer<'s> for Names<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
let mut first = true;
for name in &self.0 {
if !first {
writer.write_all("\u{20}".as_bytes()).await?;
}
name.write(writer).await?;
if first {
first = false
}
}
Ok(())
}
}
/// [7] Nmtoken ::= (NameChar)+
impl<'s> Composer<'s> for Nmtoken<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all(self.0.as_bytes()).await?;
Ok(())
}
}
/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
impl<'s> Composer<'s> for Nmtokens<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
let mut first = true;
for nmtoken in &self.0 {
if !first {
writer.write_all("\u{20}".as_bytes()).await?;
}
nmtoken.write(writer).await?;
if first {
first = false
}
}
Ok(())
}
}
/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
/// | "'" ([^%&'] | PEReference | Reference)* "'"
impl<'s> Composer<'s> for EntityValue<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
match self {
EntityValue::DoubleQuoted(entity_value_data) => {
writer.write_all("\"".as_bytes()).await?;
for entity_value_data in entity_value_data {
match entity_value_data {
EntityValueData::String(s) => writer.write_all(s.as_bytes()).await?,
EntityValueData::PEReference(pe_reference) => {
pe_reference.write(writer).await?
}
EntityValueData::Reference(reference) => reference.write(writer).await?,
}
}
writer.write_all("\"".as_bytes()).await?;
}
EntityValue::SingleQuoted(entity_value_data) => {
writer.write_all("'".as_bytes()).await?;
for entity_value_data in entity_value_data {
match entity_value_data {
EntityValueData::String(s) => writer.write_all(s.as_bytes()).await?,
EntityValueData::PEReference(pe_reference) => {
pe_reference.write(writer).await?
}
EntityValueData::Reference(reference) => reference.write(writer).await?,
}
}
writer.write_all("'".as_bytes()).await?;
}
}
Ok(())
}
}
/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
/// | "'" ([^<&'] | Reference)* "'"
impl<'s> Composer<'s> for AttValue<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
match self {
AttValue::DoubleQuoted(att_value_data) => {
writer.write_all("\"".as_bytes()).await?;
for att_value_data in att_value_data {
match att_value_data {
AttValueData::String(s) => writer.write_all(s.as_bytes()).await?,
AttValueData::Reference(reference) => reference.write(writer).await?,
}
}
writer.write_all("\"".as_bytes()).await?;
}
AttValue::SingleQuoted(att_value_data) => {
writer.write_all("'".as_bytes()).await?;
for att_value_data in att_value_data {
match att_value_data {
AttValueData::String(s) => writer.write_all(s.as_bytes()).await?,
AttValueData::Reference(reference) => reference.write(writer).await?,
}
}
writer.write_all("'".as_bytes()).await?;
}
}
Ok(())
}
}
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
impl<'s> Composer<'s> for SystemLiteral<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
match self {
SystemLiteral::DoubleQuoted(s) => {
writer.write_all("\"".as_bytes()).await?;
writer.write_all(s.as_bytes()).await?;
writer.write_all("\"".as_bytes()).await?;
}
SystemLiteral::SingleQuoted(s) => {
writer.write_all("'".as_bytes()).await?;
writer.write_all(s.as_bytes()).await?;
writer.write_all("'".as_bytes()).await?;
}
}
Ok(())
}
}
/// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
impl<'s> Composer<'s> for PubidLiteral<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
match self {
PubidLiteral::DoubleQuoted(s) => {
writer.write_all("\"".as_bytes()).await?;
writer.write_all(s.as_bytes()).await?;
writer.write_all("\"".as_bytes()).await?;
}
PubidLiteral::SingleQuoted(s) => {
writer.write_all("'".as_bytes()).await?;
writer.write_all(s.as_bytes()).await?;
writer.write_all("'".as_bytes()).await?;
}
}
Ok(())
}
}
/// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
impl Composer<'_> for PubidChar {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all(self.0.to_string().as_bytes()).await?;
Ok(())
}
}
/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
impl<'s> Composer<'s> for CharData<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all(self.0.as_bytes()).await?;
Ok(())
}
}
/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
impl<'s> Composer<'s> for Comment<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all("<!--".as_bytes()).await?;
writer.write_all(self.0.as_bytes()).await?;
writer.write_all("-->".as_bytes()).await?;
Ok(())
}
}
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
impl<'s> Composer<'s> for PI<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all("<?".as_bytes()).await?;
self.target.write(writer).await?;
if let Some(instruction) = self.instruction {
S.write(writer).await?;
writer.write_all(instruction.as_bytes()).await?;
}
writer.write_all("?>".as_bytes()).await?;
Ok(())
}
}
/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
impl<'s> Composer<'s> for PITarget<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
self.0.write(writer).await?;
Ok(())
}
}
/// [18] CDSect ::= CDStart CData CDEnd
impl<'s> Composer<'s> for CDSect<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
CDStart.write(writer).await?;
self.0.write(writer).await?;
CDEnd.write(writer).await?;
Ok(())
}
}
/// [19] CDStart ::= '<![CDATA['
impl Composer<'_> for CDStart {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all("<![CDATA[".as_bytes()).await?;
Ok(())
}
}
/// [20] CData ::= (Char* - (Char* ']]>' Char*))
impl<'s> Composer<'s> for CData<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all(self.0.as_bytes()).await?;
Ok(())
}
}
/// [21] CDEnd ::= ']]>'
impl Composer<'_> for CDEnd {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all("]]>".as_bytes()).await?;
Ok(())
}
}
/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
impl<'s> Composer<'s> for Prolog<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
if let Some(xml_decl) = &self.0 {
xml_decl.write(writer).await?;
}
for misc in &self.1 {
misc.write(writer).await?;
}
if let Some((doctype_decl, miscs)) = &self.2 {
doctype_decl.write(writer).await?;
for misc in miscs {
misc.write(writer).await?;
}
}
Ok(())
}
}
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
impl<'s> Composer<'s> for XMLDecl<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all("<?xml".as_bytes()).await?;
self.version_info.write(writer).await?;
if let Some(encoding_decl) = self.encoding_decl {
encoding_decl.write(writer).await?
}
if let Some(sd_decl) = self.sd_decl {
sd_decl.write(writer).await?
}
writer.write_all("?>".as_bytes()).await?;
Ok(())
}
}
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
impl Composer<'_> for VersionInfo {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
S.write(writer).await?;
writer.write_all("version".as_bytes()).await?;
Eq.write(writer).await?;
match self {
VersionInfo::SingleQuoted(version_num) => {
writer.write_all("'".as_bytes()).await?;
version_num.write(writer).await?;
writer.write_all("'".as_bytes()).await?;
}
VersionInfo::DoubleQuoted(version_num) => {
writer.write_all("\"".as_bytes()).await?;
version_num.write(writer).await?;
writer.write_all("\"".as_bytes()).await?;
}
}
Ok(())
}
}
/// [25] Eq ::= S? '=' S?
impl Composer<'_> for Eq {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all("=".as_bytes()).await?;
Ok(())
}
}
/// [26] VersionNum ::= '1.' [0-9]+
impl Composer<'_> for VersionNum {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
match self {
VersionNum::One => writer.write_all("1.0".as_bytes()).await?,
VersionNum::OneDotOne => writer.write_all("1.1".as_bytes()).await?,
}
Ok(())
}
}
/// [27] Misc ::= Comment | PI | S
impl<'s> Composer<'s> for Misc<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
match self {
Misc::Comment(comment) => comment.write(writer).await?,
Misc::PI(pi) => pi.write(writer).await?,
Misc::S => {}
}
Ok(())
}
}
/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
impl<'s> Composer<'s> for DoctypeDecl<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
writer.write_all("<!DOCTYPE".as_bytes()).await?;
S.write(writer).await?;
self.name.write(writer).await?;
if let Some(external_id) = self.external_id {
S.write(writer).await?;
external_id.write(writer).await?;
}
if let Some(int_subset) = self.int_subset {
writer.write_all("[".as_bytes()).await?;
int_subset.write(writer).await?;
writer.write_all("]".as_bytes()).await?;
}
writer.write_all(">".as_bytes()).await?;
Ok(())
}
}
/// [28a] DeclSep ::= PEReference | S
impl<'s> Composer<'s> for DeclSep<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
match self {
DeclSep::PEReference(pe_reference) => pe_reference.write(writer).await?,
DeclSep::S => S.write(writer).await?,
}
Ok(())
}
}
/// [28b] intSubset ::= (markupdecl | DeclSep)*
impl<'s> Composer<'s> for IntSubset<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
for declaration in self {
match declaration {
super::IntSubsetDeclaration::MarkupDecl(markup_decl) => {
markup_decl.write(writer).await?
}
super::IntSubsetDeclaration::DeclSep(decl_sep) => decl_sep.write(writer).await?,
}
}
Ok(())
}
}
/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
impl<'s> Composer<'s> for MarkupDecl<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
match self {
MarkupDecl::Elementdecl(elementdecl) => elementdecl.write(writer).await?,
MarkupDecl::AttlistDecl(attlist_decl) => attlist_decl.write(writer).await?,
MarkupDecl::EntityDecl(entity_decl) => entity_decl.write(writer).await?,
MarkupDecl::NotationDecl(notation_decl) => notation_decl.write(writer).await?,
MarkupDecl::PI(pi) => pi.write(writer).await?,
MarkupDecl::Comment(comment) => comment.write(writer).await?,
}
Ok(())
}
}
/// [30] extSubset ::= TextDecl? extSubsetDecl
impl<'s> Composer<'s> for ExtSubset<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
if let Some(text_decl) = self.text_decl {
text_decl.write(writer).await?
}
self.ext_subset_decl.write(writer).await?;
Ok(())
}
}
/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
impl<'s> Composer<'s> for ExtSubsetDecl<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
for declaration in self {
match declaration {
super::ExtSubsetDeclaration::MarkupDecl(markup_decl) => {
markup_decl.write(writer).await?
}
super::ExtSubsetDeclaration::ConditionalSect(conditional_sect) => {
conditional_sect.write(writer).await?
}
super::ExtSubsetDeclaration::DeclSep(decl_sep) => decl_sep.write(writer).await?,
}
}
Ok(())
}
}
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
impl Composer<'_> for SDDecl {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
S.write(writer).await?;
writer.write_all("standalone".as_bytes()).await?;
Eq.write(writer).await?;
match self {
SDDecl::SingleQuoted(sd_decl) => {
writer.write_all("'".as_bytes()).await?;
match sd_decl {
true => writer.write_all("yes".as_bytes()).await?,
false => writer.write_all("no".as_bytes()).await?,
}
writer.write_all("'".as_bytes()).await?;
}
SDDecl::DoubleQuoted(sd_decl) => {
writer.write_all("\"".as_bytes()).await?;
match sd_decl {
true => writer.write_all("yes".as_bytes()).await?,
false => writer.write_all("no".as_bytes()).await?,
}
writer.write_all("\"".as_bytes()).await?;
}
}
Ok(())
}
}
/// [39] element ::= EmptyElemTag | STag content ETag
impl<'s> Composer<'s> for Element<'s> {
async fn write<W>(&self, writer: &mut W) -> io::Result<()>
where
W: Unpin + AsyncWrite,
{
todo!()
}
}

View File

@ -58,8 +58,9 @@ pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
pub struct Char(char);
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
#[derive(Clone)]
#[repr(transparent)]
pub struct S<'s>(&'s str);
pub struct S;
/// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
#[repr(transparent)]
@ -88,7 +89,7 @@ pub struct Nmtoken<'s>(&'s str);
pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>);
#[derive(Clone, Debug)]
pub enum LiteralData<'s> {
pub enum EntityValueData<'s> {
String(&'s str),
PEReference(PEReference<'s>),
Reference(Reference<'s>),
@ -96,24 +97,37 @@ pub enum LiteralData<'s> {
/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
/// | "'" ([^%&'] | PEReference | Reference)* "'"
#[derive(Debug)]
#[repr(transparent)]
pub struct EntityValue<'s>(Vec<LiteralData<'s>>);
pub enum EntityValue<'s> {
DoubleQuoted(Vec<EntityValueData<'s>>),
SingleQuoted(Vec<EntityValueData<'s>>),
}
#[derive(Clone, Debug)]
pub enum AttValueData<'s> {
String(&'s str),
Reference(Reference<'s>),
}
/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
/// | "'" ([^<&'] | Reference)* "'"
#[derive(Clone, Debug)]
#[repr(transparent)]
pub struct AttValue<'s>(Vec<LiteralData<'s>>);
pub enum AttValue<'s> {
DoubleQuoted(Vec<AttValueData<'s>>),
SingleQuoted(Vec<AttValueData<'s>>),
}
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
#[derive(Debug)]
#[repr(transparent)]
pub struct SystemLiteral<'s>(&'s str);
pub enum SystemLiteral<'s> {
DoubleQuoted(&'s str),
SingleQuoted(&'s str),
}
/// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
#[derive(Debug)]
#[repr(transparent)]
pub struct PubidLiteral<'s>(&'s str);
pub enum PubidLiteral<'s> {
DoubleQuoted(&'s str),
SingleQuoted(&'s str),
}
/// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
#[repr(transparent)]
@ -176,7 +190,10 @@ pub struct XMLDecl<'s> {
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
#[derive(Debug)]
pub struct VersionInfo(VersionNum);
pub enum VersionInfo {
SingleQuoted(VersionNum),
DoubleQuoted(VersionNum),
}
/// [25] Eq ::= S? '=' S?
#[derive(Clone)]
@ -220,6 +237,7 @@ pub enum IntSubsetDeclaration<'s> {
MarkupDecl(MarkupDecl<'s>),
DeclSep(DeclSep<'s>),
}
/// from [16] intSubset ::= (markupdecl | PEReference | S)*
/// [28b] intSubset ::= (markupdecl | DeclSep)*
pub type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;
@ -249,7 +267,11 @@ pub enum ExtSubsetDeclaration<'s> {
type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
pub type SDDecl = bool;
#[derive(Debug, Clone)]
pub enum SDDecl {
SingleQuoted(bool),
DoubleQuoted(bool),
}
// (Productions 33 through 38 have been removed.)

View File

@ -15,18 +15,18 @@ use nom::{
use crate::xml::NSAttName;
use super::{
AttDef, AttDefName, AttType, AttValue, AttlistDecl, Attribute, CDEnd, CDSect, CDStart, CData,
Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment, ConditionalSect, Content,
ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName, DefaultDecl, DoctypeDecl,
Document, ETag, Element, Elementdecl, EmptyElemTag, EncName, EncodingDecl, EntityDecl,
EntityDef, EntityRef, EntityValue, EnumeratedType, Enumeration, Eq, ExtParsedEnt, ExtSubset,
ExtSubsetDecl, ExtSubsetDeclaration, ExternalID, GEDecl, Ignore, IgnoreSect,
IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration, LiteralData, LocalPart,
MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names, Nmtoken,
Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef, PEReference,
PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral, PublicID,
QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl, TokenizedType,
UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S,
AttDef, AttDefName, AttType, AttValue, AttValueData, AttlistDecl, Attribute, CDEnd, CDSect,
CDStart, CData, Char, CharData, CharRef, Children, ChildrenKind, Choice, Comment,
ConditionalSect, Content, ContentItem, Contentspec, Cp, CpKind, DeclSep, DefaultAttName,
DefaultDecl, DoctypeDecl, Document, ETag, Element, Elementdecl, EmptyElemTag, EncName,
EncodingDecl, EntityDecl, EntityDef, EntityRef, EntityValue, EntityValueData, EnumeratedType,
Enumeration, Eq, ExtParsedEnt, ExtSubset, ExtSubsetDecl, ExtSubsetDeclaration, ExternalID,
GEDecl, Ignore, IgnoreSect, IgnoreSectContents, IncludeSect, IntSubset, IntSubsetDeclaration,
LocalPart, MarkupDecl, Misc, Mixed, NCName, NDataDecl, Name, NameChar, NameStartChar, Names,
Nmtoken, Nmtokens, NotationDecl, NotationDeclID, NotationType, Occurence, PEDecl, PEDef,
PEReference, PITarget, Prefix, PrefixedAttName, PrefixedName, Prolog, PubidChar, PubidLiteral,
PublicID, QName, Reference, SDDecl, STag, Seq, StringType, SystemLiteral, TextDecl,
TokenizedType, UnprefixedName, VersionInfo, VersionNum, XMLDecl, PI, S,
};
pub trait Parser<'s, T> {
@ -141,9 +141,11 @@ impl Parser<'_, Char> for Char {
}
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
impl<'s> Parser<'s, S<'s>> for S<'s> {
fn parse(input: &'s str) -> IResult<&str, S<'s>> {
map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input)
impl Parser<'_, S> for S {
fn parse(input: &str) -> IResult<&str, S> {
// TODO?: whitespacing
// map(is_a("\u{20}\u{9}\u{D}\u{A}"), |s| S(s))(input)
value(S, is_a("\u{20}\u{9}\u{D}\u{A}"))(input)
}
}
@ -221,43 +223,46 @@ impl<'s> Parser<'s, Nmtokens<'s>> for Nmtokens<'s> {
/// | "'" ([^%&'] | PEReference | Reference)* "'"
impl<'s> Parser<'s, EntityValue<'s>> for EntityValue<'s> {
fn parse(input: &'s str) -> IResult<&str, EntityValue<'s>> {
map(
alt((
alt((
map(
delimited(
char('"'),
many0(alt((
map(
recognize(many_till(take(1usize), peek(one_of("%&\"")))),
|string| LiteralData::String(string),
|string| EntityValueData::String(string),
),
map(PEReference::parse, |pe_reference| {
LiteralData::PEReference(pe_reference)
EntityValueData::PEReference(pe_reference)
}),
map(Reference::parse, |reference| {
LiteralData::Reference(reference)
EntityValueData::Reference(reference)
}),
))),
char('"'),
),
|entity_value| EntityValue::DoubleQuoted(entity_value),
),
map(
delimited(
char('\''),
many0(alt((
map(
recognize(many_till(take(1usize), peek(one_of("%&'")))),
|string| LiteralData::String(string),
|string| EntityValueData::String(string),
),
map(PEReference::parse, |pe_reference| {
LiteralData::PEReference(pe_reference)
EntityValueData::PEReference(pe_reference)
}),
map(Reference::parse, |reference| {
LiteralData::Reference(reference)
EntityValueData::Reference(reference)
}),
))),
char('\''),
),
)),
|entity_value| EntityValue(entity_value),
)(input)
|entity_value| EntityValue::SingleQuoted(entity_value),
),
))(input)
}
}
@ -265,67 +270,76 @@ impl<'s> Parser<'s, EntityValue<'s>> for EntityValue<'s> {
/// | "'" ([^<&'] | Reference)* "'"
impl<'s> Parser<'s, AttValue<'s>> for AttValue<'s> {
fn parse(input: &'s str) -> IResult<&str, AttValue<'s>> {
map(
alt((
alt((
map(
delimited(
char('"'),
many0(alt((
map(
recognize(many_till(take(1usize), peek(one_of("%&\"")))),
|string| LiteralData::String(string),
|string| AttValueData::String(string),
),
map(Reference::parse, |reference| {
LiteralData::Reference(reference)
AttValueData::Reference(reference)
}),
))),
char('"'),
),
|att_value| AttValue::DoubleQuoted(att_value),
),
map(
delimited(
char('\''),
many0(alt((
map(
recognize(many_till(take(1usize), peek(one_of("%&'")))),
|string| LiteralData::String(string),
|string| AttValueData::String(string),
),
map(Reference::parse, |reference| {
LiteralData::Reference(reference)
AttValueData::Reference(reference)
}),
))),
char('\''),
),
)),
|att_value| AttValue(att_value),
)(input)
|att_value| AttValue::SingleQuoted(att_value),
),
))(input)
}
}
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
impl<'s> Parser<'s, SystemLiteral<'s>> for SystemLiteral<'s> {
fn parse(input: &'s str) -> IResult<&str, SystemLiteral<'s>> {
map(
alt((
alt((
map(
delimited(char('"'), recognize(many0(none_of("\""))), char('"')),
|system_literal| SystemLiteral::DoubleQuoted(system_literal),
),
map(
delimited(char('\''), recognize(many0(none_of("'"))), char('\'')),
)),
|system_literal| SystemLiteral(system_literal),
)(input)
|system_literal| SystemLiteral::SingleQuoted(system_literal),
),
))(input)
}
}
/// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
impl<'s> Parser<'s, PubidLiteral<'s>> for PubidLiteral<'s> {
fn parse(input: &'s str) -> IResult<&str, PubidLiteral<'s>> {
map(
alt((
alt((
map(
delimited(char('"'), recognize(many0(PubidChar::parse)), char('"')),
|pubid_literal| PubidLiteral::DoubleQuoted(pubid_literal),
),
map(
delimited(
char('\''),
recognize(many0(recognize(not(char('\''))).and_then(PubidChar::parse))),
char('\''),
),
)),
|pubid_literal| PubidLiteral(pubid_literal),
)(input)
|pubid_literal| PubidLiteral::SingleQuoted(pubid_literal),
),
))(input)
}
}
@ -477,15 +491,18 @@ impl<'s> Parser<'s, XMLDecl<'s>> for XMLDecl<'s> {
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
impl Parser<'_, VersionInfo> for VersionInfo {
fn parse(input: &'_ str) -> IResult<&str, VersionInfo> {
map(
preceded(
tuple((S::parse, tag("version"), Eq::parse)),
alt((
preceded(
tuple((S::parse, tag("version"), Eq::parse)),
alt((
map(
delimited(char('\''), VersionNum::parse, char('\'')),
|version_info| VersionInfo::SingleQuoted(version_info),
),
map(
delimited(char('"'), VersionNum::parse, char('"')),
)),
),
|version_num| VersionInfo(version_num),
|version_info| VersionInfo::DoubleQuoted(version_info),
),
)),
)(input)
}
}
@ -639,12 +656,18 @@ impl Parser<'_, SDDecl> for SDDecl {
alt((
delimited(
char('\''),
alt((value(true, tag("yes")), value(false, tag("no")))),
alt((
value(SDDecl::SingleQuoted(true), tag("yes")),
value(SDDecl::SingleQuoted(false), tag("no")),
)),
char('\''),
),
delimited(
char('"'),
alt((value(true, tag("yes")), value(false, tag("no")))),
alt((
value(SDDecl::DoubleQuoted(true), tag("yes")),
value(SDDecl::DoubleQuoted(false), tag("no")),
)),
char('"'),
),
)),