implement element reading

This commit is contained in:
cel 🌸 2024-11-10 14:31:43 +00:00
parent 593cad573b
commit be50ab4890
8 changed files with 692 additions and 37 deletions

12
Cargo.lock generated
View File

@ -46,9 +46,9 @@ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
[[package]]
name = "bytes"
version = "1.6.0"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da"
[[package]]
name = "cc"
@ -62,6 +62,13 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "circular"
version = "0.3.0"
dependencies = [
"bytes",
]
[[package]]
name = "futures"
version = "0.3.30"
@ -265,6 +272,7 @@ dependencies = [
name = "peanuts"
version = "0.1.0"
dependencies = [
"circular",
"futures",
"nom",
"tokio",

View File

@ -6,6 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
circular = { version = "0.3.0", path = "../circular" }
futures = "0.3.30"
nom = "7.1.3"
tokio = { version = "1.36.0", features = ["io-util", "net", "io-std", "full"] }

View File

@ -1,23 +1,32 @@
// elements resemble a final tree, including inherited namespace information
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use crate::{
error::Error,
xml::{self, Attribute},
};
// when are namespaces names chosen then if they are automatically calculated
// namespaces are held by readers and writers.
#[derive(PartialEq, Eq, Hash, Clone)]
pub struct Namespace {
prefix: Option<String>,
namespace: String,
pub prefix: Option<String>,
pub namespace: String,
}
// names are qualified, they contain a reference to the namespace (held within the reader/writer)
#[derive(PartialEq, Eq, Hash, Clone)]
pub struct Name {
namespace: String,
name: String,
pub namespace: Namespace,
pub name: String,
}
pub enum Node {
pub enum Content {
Element(Element),
Text(String),
PI(String),
Comment(String),
}
// should this be a trait?
@ -29,16 +38,35 @@ pub struct Element {
// namespace: String,
// hashmap of explicit namespace declarations on the element itself only
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
pub namespace_decl: HashMap<Option<String>, String>,
pub namespace_decl: HashSet<Namespace>,
// attributes can be in a different namespace than the element. how to make sure they are valid?
// maybe include the namespace instead of or with the prefix
// you can calculate the prefix from the namespaced name and the current writer context
// you can validate the prefix and calculate the namespace from the current reader context
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
pub attributes: HashMap<Name, String>,
pub children: Option<Vec<Node>>,
pub content: Vec<Content>,
}
// impl<'s> TryFrom<xml::Element<'s>> for Element<'s> {
// type Error = Error;
// fn try_from(xml_element: xml::Element) -> Result<Self, Self::Error> {
// match &xml_element {
// xml::Element::Empty(empty_elem_tag) => {
// let namespace_decl;
// let attributes;
// empty_elem_tag
// .attributes
// .into_iter()
// .filter(|attribute| matches!(attribute, Attribute::NamespaceDeclaration(_)));
// todo!()
// }
// xml::Element::NotEmpty(stag, content, etag) => todo!(),
// }
// }
// }
// example of deriving an element:
// #[derive(XMLWrite, XMLRead)]

View File

@ -1,9 +1,18 @@
use std::str::Utf8Error;
use std::{num::ParseIntError, str::Utf8Error};
use crate::element::{Name, Namespace};
pub enum Error {
ReadError(std::io::Error),
Utf8Error(Utf8Error),
ParseError(String),
EntityProcessError(String),
// TODO: better choice for failures than string
InvalidCharRef(String),
DuplicateNameSpace(Namespace),
DuplicateAttribute(String),
UnqualifiedNamespace(String),
MismatchedEndTag(String, String),
}
impl From<std::io::Error> for Error {
@ -17,3 +26,9 @@ impl From<Utf8Error> for Error {
Self::Utf8Error(e)
}
}
impl From<ParseIntError> for Error {
fn from(e: ParseIntError) -> Self {
Self::InvalidCharRef(e.to_string())
}
}

View File

@ -3,3 +3,5 @@ mod error;
mod reader;
mod writer;
pub mod xml;
pub type Result<T> = std::result::Result<T, error::Error>;

View File

@ -5,12 +5,87 @@ use peanuts::xml::Document;
#[tokio::main]
async fn main() {
let (rest, document) = Document::parse(
"<?xml version=\"1.0\"?>
<TEST>
<block1>Background Mark 1</block1>
<block2>Background Mark 2</block2>
<block3>Background Mark 3</block3>
</TEST>ahsdkjlfhasdlkjfhkljh
"<?xml version='1.0' encoding='UTF-8'?>
<xs:schema
xmlns:xs='http://www.w3.org/2001/XMLSchema'
targetNamespace='http://etherx.jabber.org/streams'
xmlns='http://etherx.jabber.org/streams'
elementFormDefault='unqualified'>
<xs:import namespace='jabber:client'/>
<xs:import namespace='jabber:server'/>
<xs:import namespace='urn:ietf:params:xml:ns:xmpp-sasl'/>
<xs:import namespace='urn:ietf:params:xml:ns:xmpp-streams'/>
<xs:import namespace='urn:ietf:params:xml:ns:xmpp-tls'/>
<xs:element name='stream'>
<xs:complexType>
<xs:sequence xmlns:client='jabber:client'
xmlns:server='jabber:server'>
<xs:element ref='features'
minOccurs='0'
maxOccurs='1'/>
<xs:any namespace='urn:ietf:params:xml:ns:xmpp-tls'
minOccurs='0'
maxOccurs='1'/>
<xs:any namespace='urn:ietf:params:xml:ns:xmpp-sasl'
minOccurs='0'
maxOccurs='1'/>
<xs:any namespace='##other'
minOccurs='0'
maxOccurs='unbounded'
processContents='lax'/>
<xs:choice minOccurs='0' maxOccurs='1'>
<xs:choice minOccurs='0' maxOccurs='unbounded'>
<xs:element ref='client:message'/>
<xs:element ref='client:presence'/>
<xs:element ref='client:iq'/>
</xs:choice>
<xs:choice minOccurs='0' maxOccurs='unbounded'>
<xs:element ref='server:message'/>
<xs:element ref='server:presence'/>
<xs:element ref='server:iq'/>
</xs:choice>
</xs:choice>
<xs:element ref='error' minOccurs='0' maxOccurs='1'/>
</xs:sequence>
<xs:attribute name='from' type='xs:string' use='optional'/>
<xs:attribute name='id' type='xs:string' use='optional'/>
<xs:attribute name='to' type='xs:string' use='optional'/>
<xs:attribute name='version' type='xs:decimal' use='optional'/>
<xs:attribute ref='xml:lang' use='optional'/>
<xs:anyAttribute namespace='##other' processContents='lax'/>
</xs:complexType>
</xs:element>
<xs:element name='features'>
<xs:complexType>
<xs:sequence>
<xs:any namespace='##other'
minOccurs='0'
maxOccurs='unbounded'
processContents='lax'/>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name='error'>
<xs:complexType>
<xs:sequence xmlns:err='urn:ietf:params:xml:ns:xmpp-streams'>
<xs:group ref='err:streamErrorGroup'/>
<xs:element ref='err:text'
minOccurs='0'
maxOccurs='1'/>
<xs:any namespace='##other'
minOccurs='0'
maxOccurs='1'
processContents='lax'/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>asdf
",
)
.unwrap();

View File

@ -1,31 +1,348 @@
use circular::Buffer;
use futures::Stream;
use nom::Err;
use std::{collections::BTreeMap, str};
use tokio::io::AsyncBufReadExt;
use std::{
collections::{BTreeMap, HashMap, HashSet},
path::Prefix,
str::{self, FromStr},
};
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
static MAX_STANZA_SIZE: usize = 65536;
use crate::{
element::{Element, Name, Namespace},
element::{Content, Element, Name, Namespace},
error::Error,
xml::{self, parsers::Parser},
Result,
};
/// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> {
inner: R,
buffer: Buffer,
// holds which tags we are in atm over depth
// to have names reference namespaces could
depth: Vec<Name>,
namespaces: Vec<(usize, Namespace)>,
namespaces: Vec<HashSet<Namespace>>,
}
impl<R> Reader<R> {
pub fn new(reader: R) -> Self {
Self {
inner: reader,
buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
depth: Vec::new(),
namespaces: Vec::new(),
}
}
}
impl<R> Reader<R>
where
R: AsyncRead + Unpin,
{
async fn read_buf(&mut self) -> Result<usize> {
Ok(self.inner.read_buf(&mut self.buffer).await?)
}
async fn read_element<'s>(&'s mut self) -> Result<Element> {
self.read_buf().await?;
let mut input = str::from_utf8(self.buffer.data())?;
loop {
match xml::Element::parse(input) {
Ok((rest, e)) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
let element = Reader::<R>::element_from_xml(&mut self.namespaces, e)?;
self.buffer.consume(len);
return Ok(element);
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {
self.read_buf().await?;
input = str::from_utf8(self.buffer.data())?;
}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
},
}
}
}
}
impl<R> Reader<R> {
fn element_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>,
element: xml::Element,
) -> Result<Element> {
match element {
xml::Element::Empty(empty_elem_tag) => {
let mut namespace_declarations = HashSet::new();
for (prefix, namespace) in
empty_elem_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
Some((ns_name, value))
} else {
None
}
})
{
let prefix = match prefix {
xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
Some(prefixed_att_name.to_string())
}
xml::NSAttName::DefaultAttName => None,
};
let namespace = Namespace {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpace(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
let namespace_stack: Vec<&Namespace> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
.collect();
let mut attributes = HashMap::new();
for (q_name, value) in empty_elem_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::Attribute { name, value } = attribute {
Some((name, value))
} else {
None
}
}) {
let namespace;
let attribute_name;
match q_name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack.iter().rfind(|namespace| {
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
});
attribute_name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
attribute_name = unprefixed_name.to_string();
}
}
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
let name = Name {
namespace,
name: attribute_name,
};
let value = value.process()?;
if let Some(_value) = attributes.insert(name, value) {
return Err(Error::DuplicateAttribute(q_name.to_string()));
}
} else {
return Err(Error::UnqualifiedNamespace(q_name.to_string()));
}
}
let name;
let namespace;
match &empty_elem_tag.name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack.iter().rfind(|namespace| {
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
});
name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
name = unprefixed_name.to_string();
}
}
let namespace = (*namespace
.ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?)
.clone();
let name = Name { namespace, name };
return Ok(Element {
name,
namespace_decl: namespace_declarations,
attributes,
content: Vec::new(),
});
}
xml::Element::NotEmpty(s_tag, content, e_tag) => {
if s_tag.name != e_tag.name {
return Err(Error::MismatchedEndTag(
s_tag.name.to_string(),
e_tag.name.to_string(),
));
}
let mut namespace_declarations = HashSet::new();
for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
Some((ns_name, value))
} else {
None
}
}) {
let prefix = match prefix {
xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
Some(prefixed_att_name.to_string())
}
xml::NSAttName::DefaultAttName => None,
};
let namespace = Namespace {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpace(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
let namespace_stack: Vec<&Namespace> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
.collect();
let mut attributes = HashMap::new();
for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::Attribute { name, value } = attribute {
Some((name, value))
} else {
None
}
}) {
let namespace;
let attribute_name;
match q_name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack.iter().rfind(|namespace| {
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
});
attribute_name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
attribute_name = unprefixed_name.to_string();
}
}
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
let name = Name {
namespace,
name: attribute_name,
};
let value = value.process()?;
if let Some(_value) = attributes.insert(name, value) {
return Err(Error::DuplicateAttribute(q_name.to_string()));
}
} else {
return Err(Error::UnqualifiedNamespace(q_name.to_string()));
}
}
let name;
let namespace;
match &s_tag.name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack.iter().rfind(|namespace| {
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
});
name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
name = unprefixed_name.to_string();
}
}
let namespace = (*namespace
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
.clone();
let name = Name { namespace, name };
namespaces.push(namespace_declarations.clone());
let content = Self::content_from_xml(namespaces, content)?;
namespaces.pop();
return Ok(Element {
name,
namespace_decl: namespace_declarations,
attributes,
content,
});
}
}
}
fn content_from_xml(
namespaces: &mut Vec<HashSet<Namespace>>,
element: xml::Content,
) -> Result<Vec<Content>> {
let mut content = Vec::new();
let mut text = element.char_data.map(|str| String::from(*str));
for (content_item, char_data) in element.content {
match content_item {
xml::ContentItem::Element(element) => {
text.map(|text| content.push(Content::Text(text)));
content.push(Content::Element(Self::element_from_xml(
namespaces, element,
)?));
text = char_data.map(|str| String::from(*str));
}
xml::ContentItem::Reference(reference) => {
let data = reference.process()?;
if let Some(text) = &mut text {
text.push(data)
} else {
text = Some(String::from(data))
}
char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
}
xml::ContentItem::CDSect(cd_sect) => {
if let Some(text) = &mut text {
text.push_str(**cd_sect)
} else {
text = Some(String::from(**cd_sect))
}
char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
}
// TODO: is this important?
xml::ContentItem::PI(pi) => {
char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
}
// TODO: comments?
xml::ContentItem::Comment(comment) => {
char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
}
}
}
text.map(|text| content.push(Content::Text(text)));
todo!()
}
}
// impl<R> Reader<R>
// where
// R: AsyncBufReadExt + Unpin,

View File

@ -1,4 +1,6 @@
use std::char;
use std::{char, ops::Deref};
use crate::error::Error;
pub mod composers;
pub mod parsers;
@ -14,40 +16,91 @@ pub enum NSAttName<'s> {
#[derive(Clone, Debug)]
pub struct PrefixedAttName<'s>(NCName<'s>);
impl<'s> Deref for PrefixedAttName<'s> {
type Target = NCName<'s>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [3] DefaultAttName ::= 'xmlns';
#[derive(Clone, Debug)]
pub struct DefaultAttName;
/// [4] NCName ::= Name - (Char* ':' Char*)
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct NCName<'s>(&'s str);
impl<'s> Deref for NCName<'s> {
type Target = &'s str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [7] QName ::= PrefixedName | UnprefixedName
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum QName<'s> {
PrefixedName(PrefixedName<'s>),
UnprefixedName(UnprefixedName<'s>),
}
impl<'s> ToString for QName<'s> {
fn to_string(&self) -> String {
match self {
QName::PrefixedName(prefixed_name) => {
format!("{}:{}", **prefixed_name.prefix, **prefixed_name.local_part)
}
QName::UnprefixedName(unprefixed_name) => unprefixed_name.to_string(),
}
}
}
/// [8] PrefixedName ::= Prefix ':' LocalPart
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PrefixedName<'s> {
prefix: Prefix<'s>,
local_part: LocalPart<'s>,
pub(crate) prefix: Prefix<'s>,
pub(crate) local_part: LocalPart<'s>,
}
/// [9] UnprefixedName ::= LocalPart
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct UnprefixedName<'s>(LocalPart<'s>);
impl<'s> Deref for UnprefixedName<'s> {
type Target = LocalPart<'s>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [10] Prefix ::= NCName
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Prefix<'s>(NCName<'s>);
impl<'s> Deref for Prefix<'s> {
type Target = NCName<'s>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [11] LocalPart ::= NCName
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct LocalPart<'s>(NCName<'s>);
impl<'s> Deref for LocalPart<'s> {
type Target = NCName<'s>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
// xml spec
/// [1] document ::= prolog element Misc*
@ -57,6 +110,14 @@ pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
#[repr(transparent)]
pub struct Char(char);
impl Deref for Char {
type Target = char;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
#[derive(Clone)]
#[repr(transparent)]
@ -66,28 +127,76 @@ pub struct S;
#[repr(transparent)]
pub struct NameStartChar(char);
impl Deref for NameStartChar {
type Target = char;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
#[repr(transparent)]
pub struct NameChar(char);
impl Deref for NameChar {
type Target = char;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [5] Name ::= NameStartChar (NameChar)*
#[derive(Debug, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct Name<'s>(&'s str);
impl<'s> Deref for Name<'s> {
type Target = &'s str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [6] Names ::= Name (#x20 Name)*
#[repr(transparent)]
pub struct Names<'s>(Vec<Name<'s>>);
impl<'s> Deref for Names<'s> {
type Target = Vec<Name<'s>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [7] Nmtoken ::= (NameChar)+
#[derive(Debug, Clone)]
#[repr(transparent)]
pub struct Nmtoken<'s>(&'s str);
impl<'s> Deref for Nmtoken<'s> {
type Target = &'s str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
#[repr(transparent)]
pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>);
impl<'s> Deref for Nmtokens<'s> {
type Target = Vec<Nmtoken<'s>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Clone, Debug)]
pub enum EntityValueData<'s> {
String(&'s str),
@ -115,6 +224,24 @@ pub enum AttValue<'s> {
SingleQuoted(Vec<AttValueData<'s>>),
}
impl<'s> AttValue<'s> {
pub fn process(&self) -> crate::Result<String> {
let mut output = String::new();
let data;
match self {
AttValue::DoubleQuoted(vec) => data = vec,
AttValue::SingleQuoted(vec) => data = vec,
}
for data in data {
match data {
AttValueData::String(s) => output.push_str(s),
AttValueData::Reference(reference) => output.push(reference.process()?),
}
}
Ok(output)
}
}
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
#[derive(Debug)]
pub enum SystemLiteral<'s> {
@ -138,11 +265,27 @@ pub struct PubidChar(char);
#[repr(transparent)]
pub struct CharData<'s>(&'s str);
impl<'s> Deref for CharData<'s> {
type Target = &'s str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Comment<'s>(&'s str);
impl<'s> Deref for Comment<'s> {
type Target = &'s str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
#[derive(Clone, Debug)]
pub struct PI<'s> {
@ -160,6 +303,14 @@ pub struct PITarget<'s>(Name<'s>);
#[repr(transparent)]
pub struct CDSect<'s>(CData<'s>);
impl<'s> Deref for CDSect<'s> {
type Target = CData<'s>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [19] CDStart ::= '<![CDATA['
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDStart;
@ -169,6 +320,14 @@ pub struct CDStart;
#[repr(transparent)]
pub struct CData<'s>(&'s str);
impl<'s> Deref for CData<'s> {
type Target = &'s str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [21] CDEnd ::= ']]>'
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDEnd;
@ -286,8 +445,8 @@ pub enum Element<'s> {
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
#[derive(Debug, Clone)]
pub struct STag<'s> {
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
pub(crate) name: QName<'s>,
pub(crate) attributes: Vec<Attribute<'s>>,
}
/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
@ -309,7 +468,7 @@ pub enum Attribute<'s> {
/// [42] ETag ::= '</' Name S? '>'
#[derive(Debug, Clone)]
pub struct ETag<'s> {
name: QName<'s>,
pub(crate) name: QName<'s>,
}
#[derive(Debug, Clone)]
@ -324,16 +483,16 @@ pub enum ContentItem<'s> {
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
#[derive(Debug, Clone)]
pub struct Content<'s> {
char_data: Option<CharData<'s>>,
content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
pub(crate) char_data: Option<CharData<'s>>,
pub(crate) content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
}
/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
pub(crate) name: QName<'s>,
pub(crate) attributes: Vec<Attribute<'s>>,
}
/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
@ -503,6 +662,32 @@ pub enum CharRef<'s> {
Hexadecimal(&'s str),
}
impl<'s> CharRef<'s> {
pub fn process(&self) -> crate::Result<char> {
let int: u32;
match self {
CharRef::Decimal(dec) => {
int = dec.parse()?;
}
CharRef::Hexadecimal(hex) => {
int = <u32>::from_str_radix(hex, 16)?;
}
}
let c = std::char::from_u32(int);
let c = c.ok_or_else(|| Error::InvalidCharRef(int.to_string()))?;
if matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}')
{
return Ok(c);
} else {
return Err(Error::InvalidCharRef(format!(
"{} is not a valid xml char",
c
)));
};
}
}
/// [67] Reference ::= EntityRef | CharRef
#[derive(Clone, Debug)]
pub enum Reference<'s> {
@ -510,10 +695,34 @@ pub enum Reference<'s> {
CharRef(CharRef<'s>),
}
impl<'s> Reference<'s> {
pub fn process(&self) -> crate::Result<char> {
match self {
Reference::EntityRef(entity_ref) => match *entity_ref.deref().deref() {
"amp" => Ok('&'),
"lt" => Ok('<'),
"gt" => Ok('>'),
"apos" => Ok('\''),
"quot" => Ok('"'),
e => return Err(Error::EntityProcessError(e.to_string())),
},
Reference::CharRef(char_ref) => Ok(char_ref.process()?),
}
}
}
/// [68] EntityRef ::= '&' Name ';'
#[derive(Clone, Debug)]
pub struct EntityRef<'s>(Name<'s>);
impl<'s> Deref for EntityRef<'s> {
type Target = Name<'s>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [69] PEReference ::= '%' Name ';'
#[derive(Clone, Debug)]
#[repr(transparent)]