implement element reading
This commit is contained in:
parent
593cad573b
commit
be50ab4890
|
@ -46,9 +46,9 @@ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytes"
|
name = "bytes"
|
||||||
version = "1.6.0"
|
version = "1.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
|
checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
|
@ -62,6 +62,13 @@ version = "1.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "circular"
|
||||||
|
version = "0.3.0"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures"
|
name = "futures"
|
||||||
version = "0.3.30"
|
version = "0.3.30"
|
||||||
|
@ -265,6 +272,7 @@ dependencies = [
|
||||||
name = "peanuts"
|
name = "peanuts"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"circular",
|
||||||
"futures",
|
"futures",
|
||||||
"nom",
|
"nom",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
|
|
@ -6,6 +6,7 @@ edition = "2021"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
circular = { version = "0.3.0", path = "../circular" }
|
||||||
futures = "0.3.30"
|
futures = "0.3.30"
|
||||||
nom = "7.1.3"
|
nom = "7.1.3"
|
||||||
tokio = { version = "1.36.0", features = ["io-util", "net", "io-std", "full"] }
|
tokio = { version = "1.36.0", features = ["io-util", "net", "io-std", "full"] }
|
||||||
|
|
|
@ -1,23 +1,32 @@
|
||||||
// elements resemble a final tree, including inherited namespace information
|
// elements resemble a final tree, including inherited namespace information
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
error::Error,
|
||||||
|
xml::{self, Attribute},
|
||||||
|
};
|
||||||
|
|
||||||
// when are namespaces names chosen then if they are automatically calculated
|
// when are namespaces names chosen then if they are automatically calculated
|
||||||
// namespaces are held by readers and writers.
|
// namespaces are held by readers and writers.
|
||||||
|
#[derive(PartialEq, Eq, Hash, Clone)]
|
||||||
pub struct Namespace {
|
pub struct Namespace {
|
||||||
prefix: Option<String>,
|
pub prefix: Option<String>,
|
||||||
namespace: String,
|
pub namespace: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
// names are qualified, they contain a reference to the namespace (held within the reader/writer)
|
// names are qualified, they contain a reference to the namespace (held within the reader/writer)
|
||||||
|
#[derive(PartialEq, Eq, Hash, Clone)]
|
||||||
pub struct Name {
|
pub struct Name {
|
||||||
namespace: String,
|
pub namespace: Namespace,
|
||||||
name: String,
|
pub name: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum Node {
|
pub enum Content {
|
||||||
Element(Element),
|
Element(Element),
|
||||||
Text(String),
|
Text(String),
|
||||||
|
PI(String),
|
||||||
|
Comment(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
// should this be a trait?
|
// should this be a trait?
|
||||||
|
@ -29,16 +38,35 @@ pub struct Element {
|
||||||
// namespace: String,
|
// namespace: String,
|
||||||
// hashmap of explicit namespace declarations on the element itself only
|
// hashmap of explicit namespace declarations on the element itself only
|
||||||
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
|
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
|
||||||
pub namespace_decl: HashMap<Option<String>, String>,
|
pub namespace_decl: HashSet<Namespace>,
|
||||||
// attributes can be in a different namespace than the element. how to make sure they are valid?
|
// attributes can be in a different namespace than the element. how to make sure they are valid?
|
||||||
// maybe include the namespace instead of or with the prefix
|
// maybe include the namespace instead of or with the prefix
|
||||||
// you can calculate the prefix from the namespaced name and the current writer context
|
// you can calculate the prefix from the namespaced name and the current writer context
|
||||||
// you can validate the prefix and calculate the namespace from the current reader context
|
// you can validate the prefix and calculate the namespace from the current reader context
|
||||||
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
|
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
|
||||||
pub attributes: HashMap<Name, String>,
|
pub attributes: HashMap<Name, String>,
|
||||||
pub children: Option<Vec<Node>>,
|
pub content: Vec<Content>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// impl<'s> TryFrom<xml::Element<'s>> for Element<'s> {
|
||||||
|
// type Error = Error;
|
||||||
|
|
||||||
|
// fn try_from(xml_element: xml::Element) -> Result<Self, Self::Error> {
|
||||||
|
// match &xml_element {
|
||||||
|
// xml::Element::Empty(empty_elem_tag) => {
|
||||||
|
// let namespace_decl;
|
||||||
|
// let attributes;
|
||||||
|
// empty_elem_tag
|
||||||
|
// .attributes
|
||||||
|
// .into_iter()
|
||||||
|
// .filter(|attribute| matches!(attribute, Attribute::NamespaceDeclaration(_)));
|
||||||
|
// todo!()
|
||||||
|
// }
|
||||||
|
// xml::Element::NotEmpty(stag, content, etag) => todo!(),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
// example of deriving an element:
|
// example of deriving an element:
|
||||||
|
|
||||||
// #[derive(XMLWrite, XMLRead)]
|
// #[derive(XMLWrite, XMLRead)]
|
||||||
|
|
17
src/error.rs
17
src/error.rs
|
@ -1,9 +1,18 @@
|
||||||
use std::str::Utf8Error;
|
use std::{num::ParseIntError, str::Utf8Error};
|
||||||
|
|
||||||
|
use crate::element::{Name, Namespace};
|
||||||
|
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
ReadError(std::io::Error),
|
ReadError(std::io::Error),
|
||||||
Utf8Error(Utf8Error),
|
Utf8Error(Utf8Error),
|
||||||
ParseError(String),
|
ParseError(String),
|
||||||
|
EntityProcessError(String),
|
||||||
|
// TODO: better choice for failures than string
|
||||||
|
InvalidCharRef(String),
|
||||||
|
DuplicateNameSpace(Namespace),
|
||||||
|
DuplicateAttribute(String),
|
||||||
|
UnqualifiedNamespace(String),
|
||||||
|
MismatchedEndTag(String, String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<std::io::Error> for Error {
|
impl From<std::io::Error> for Error {
|
||||||
|
@ -17,3 +26,9 @@ impl From<Utf8Error> for Error {
|
||||||
Self::Utf8Error(e)
|
Self::Utf8Error(e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<ParseIntError> for Error {
|
||||||
|
fn from(e: ParseIntError) -> Self {
|
||||||
|
Self::InvalidCharRef(e.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -3,3 +3,5 @@ mod error;
|
||||||
mod reader;
|
mod reader;
|
||||||
mod writer;
|
mod writer;
|
||||||
pub mod xml;
|
pub mod xml;
|
||||||
|
|
||||||
|
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||||
|
|
87
src/main.rs
87
src/main.rs
|
@ -5,12 +5,87 @@ use peanuts::xml::Document;
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
let (rest, document) = Document::parse(
|
let (rest, document) = Document::parse(
|
||||||
"<?xml version=\"1.0\"?>
|
"<?xml version='1.0' encoding='UTF-8'?>
|
||||||
<TEST>
|
|
||||||
<block1>Background Mark 1</block1>
|
<xs:schema
|
||||||
<block2>Background Mark 2</block2>
|
xmlns:xs='http://www.w3.org/2001/XMLSchema'
|
||||||
<block3>Background Mark 3</block3>
|
targetNamespace='http://etherx.jabber.org/streams'
|
||||||
</TEST>ahsdkjlfhasdlkjfhkljh
|
xmlns='http://etherx.jabber.org/streams'
|
||||||
|
elementFormDefault='unqualified'>
|
||||||
|
|
||||||
|
<xs:import namespace='jabber:client'/>
|
||||||
|
<xs:import namespace='jabber:server'/>
|
||||||
|
<xs:import namespace='urn:ietf:params:xml:ns:xmpp-sasl'/>
|
||||||
|
<xs:import namespace='urn:ietf:params:xml:ns:xmpp-streams'/>
|
||||||
|
<xs:import namespace='urn:ietf:params:xml:ns:xmpp-tls'/>
|
||||||
|
|
||||||
|
<xs:element name='stream'>
|
||||||
|
<xs:complexType>
|
||||||
|
<xs:sequence xmlns:client='jabber:client'
|
||||||
|
xmlns:server='jabber:server'>
|
||||||
|
<xs:element ref='features'
|
||||||
|
minOccurs='0'
|
||||||
|
maxOccurs='1'/>
|
||||||
|
<xs:any namespace='urn:ietf:params:xml:ns:xmpp-tls'
|
||||||
|
minOccurs='0'
|
||||||
|
maxOccurs='1'/>
|
||||||
|
<xs:any namespace='urn:ietf:params:xml:ns:xmpp-sasl'
|
||||||
|
minOccurs='0'
|
||||||
|
maxOccurs='1'/>
|
||||||
|
<xs:any namespace='##other'
|
||||||
|
minOccurs='0'
|
||||||
|
maxOccurs='unbounded'
|
||||||
|
processContents='lax'/>
|
||||||
|
<xs:choice minOccurs='0' maxOccurs='1'>
|
||||||
|
<xs:choice minOccurs='0' maxOccurs='unbounded'>
|
||||||
|
<xs:element ref='client:message'/>
|
||||||
|
<xs:element ref='client:presence'/>
|
||||||
|
<xs:element ref='client:iq'/>
|
||||||
|
</xs:choice>
|
||||||
|
<xs:choice minOccurs='0' maxOccurs='unbounded'>
|
||||||
|
<xs:element ref='server:message'/>
|
||||||
|
<xs:element ref='server:presence'/>
|
||||||
|
<xs:element ref='server:iq'/>
|
||||||
|
</xs:choice>
|
||||||
|
</xs:choice>
|
||||||
|
<xs:element ref='error' minOccurs='0' maxOccurs='1'/>
|
||||||
|
</xs:sequence>
|
||||||
|
<xs:attribute name='from' type='xs:string' use='optional'/>
|
||||||
|
<xs:attribute name='id' type='xs:string' use='optional'/>
|
||||||
|
<xs:attribute name='to' type='xs:string' use='optional'/>
|
||||||
|
<xs:attribute name='version' type='xs:decimal' use='optional'/>
|
||||||
|
<xs:attribute ref='xml:lang' use='optional'/>
|
||||||
|
<xs:anyAttribute namespace='##other' processContents='lax'/>
|
||||||
|
</xs:complexType>
|
||||||
|
</xs:element>
|
||||||
|
|
||||||
|
<xs:element name='features'>
|
||||||
|
<xs:complexType>
|
||||||
|
<xs:sequence>
|
||||||
|
<xs:any namespace='##other'
|
||||||
|
minOccurs='0'
|
||||||
|
maxOccurs='unbounded'
|
||||||
|
processContents='lax'/>
|
||||||
|
</xs:sequence>
|
||||||
|
</xs:complexType>
|
||||||
|
</xs:element>
|
||||||
|
|
||||||
|
<xs:element name='error'>
|
||||||
|
<xs:complexType>
|
||||||
|
<xs:sequence xmlns:err='urn:ietf:params:xml:ns:xmpp-streams'>
|
||||||
|
<xs:group ref='err:streamErrorGroup'/>
|
||||||
|
<xs:element ref='err:text'
|
||||||
|
minOccurs='0'
|
||||||
|
maxOccurs='1'/>
|
||||||
|
<xs:any namespace='##other'
|
||||||
|
minOccurs='0'
|
||||||
|
maxOccurs='1'
|
||||||
|
processContents='lax'/>
|
||||||
|
</xs:sequence>
|
||||||
|
</xs:complexType>
|
||||||
|
</xs:element>
|
||||||
|
|
||||||
|
</xs:schema>asdf
|
||||||
",
|
",
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
325
src/reader.rs
325
src/reader.rs
|
@ -1,31 +1,348 @@
|
||||||
|
use circular::Buffer;
|
||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
use nom::Err;
|
use nom::Err;
|
||||||
use std::{collections::BTreeMap, str};
|
use std::{
|
||||||
use tokio::io::AsyncBufReadExt;
|
collections::{BTreeMap, HashMap, HashSet},
|
||||||
|
path::Prefix,
|
||||||
|
str::{self, FromStr},
|
||||||
|
};
|
||||||
|
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
|
||||||
|
|
||||||
|
static MAX_STANZA_SIZE: usize = 65536;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
element::{Element, Name, Namespace},
|
element::{Content, Element, Name, Namespace},
|
||||||
error::Error,
|
error::Error,
|
||||||
|
xml::{self, parsers::Parser},
|
||||||
|
Result,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// streaming reader that tracks depth and available namespaces at current depth
|
/// streaming reader that tracks depth and available namespaces at current depth
|
||||||
pub struct Reader<R> {
|
pub struct Reader<R> {
|
||||||
inner: R,
|
inner: R,
|
||||||
|
buffer: Buffer,
|
||||||
// holds which tags we are in atm over depth
|
// holds which tags we are in atm over depth
|
||||||
|
// to have names reference namespaces could
|
||||||
depth: Vec<Name>,
|
depth: Vec<Name>,
|
||||||
namespaces: Vec<(usize, Namespace)>,
|
namespaces: Vec<HashSet<Namespace>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R> Reader<R> {
|
impl<R> Reader<R> {
|
||||||
pub fn new(reader: R) -> Self {
|
pub fn new(reader: R) -> Self {
|
||||||
Self {
|
Self {
|
||||||
inner: reader,
|
inner: reader,
|
||||||
|
buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
|
||||||
depth: Vec::new(),
|
depth: Vec::new(),
|
||||||
namespaces: Vec::new(),
|
namespaces: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<R> Reader<R>
|
||||||
|
where
|
||||||
|
R: AsyncRead + Unpin,
|
||||||
|
{
|
||||||
|
async fn read_buf(&mut self) -> Result<usize> {
|
||||||
|
Ok(self.inner.read_buf(&mut self.buffer).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read_element<'s>(&'s mut self) -> Result<Element> {
|
||||||
|
self.read_buf().await?;
|
||||||
|
let mut input = str::from_utf8(self.buffer.data())?;
|
||||||
|
loop {
|
||||||
|
match xml::Element::parse(input) {
|
||||||
|
Ok((rest, e)) => {
|
||||||
|
let len = self.buffer.available_data() - rest.as_bytes().len();
|
||||||
|
let element = Reader::<R>::element_from_xml(&mut self.namespaces, e)?;
|
||||||
|
self.buffer.consume(len);
|
||||||
|
return Ok(element);
|
||||||
|
}
|
||||||
|
std::result::Result::Err(e) => match e {
|
||||||
|
Err::Incomplete(_) => {
|
||||||
|
self.read_buf().await?;
|
||||||
|
input = str::from_utf8(self.buffer.data())?;
|
||||||
|
}
|
||||||
|
// TODO: better error
|
||||||
|
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||||
|
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R> Reader<R> {
|
||||||
|
fn element_from_xml(
|
||||||
|
namespaces: &mut Vec<HashSet<Namespace>>,
|
||||||
|
element: xml::Element,
|
||||||
|
) -> Result<Element> {
|
||||||
|
match element {
|
||||||
|
xml::Element::Empty(empty_elem_tag) => {
|
||||||
|
let mut namespace_declarations = HashSet::new();
|
||||||
|
for (prefix, namespace) in
|
||||||
|
empty_elem_tag.attributes.iter().filter_map(|attribute| {
|
||||||
|
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
|
||||||
|
Some((ns_name, value))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
{
|
||||||
|
let prefix = match prefix {
|
||||||
|
xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
|
||||||
|
Some(prefixed_att_name.to_string())
|
||||||
|
}
|
||||||
|
xml::NSAttName::DefaultAttName => None,
|
||||||
|
};
|
||||||
|
let namespace = Namespace {
|
||||||
|
prefix,
|
||||||
|
namespace: namespace.process()?,
|
||||||
|
};
|
||||||
|
if !namespace_declarations.insert(namespace.clone()) {
|
||||||
|
return Err(Error::DuplicateNameSpace(namespace));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// all namespaces available to the element (from both parent elements and element itself)
|
||||||
|
let namespace_stack: Vec<&Namespace> = namespaces
|
||||||
|
.iter()
|
||||||
|
.flatten()
|
||||||
|
.chain(namespace_declarations.iter())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut attributes = HashMap::new();
|
||||||
|
|
||||||
|
for (q_name, value) in empty_elem_tag.attributes.iter().filter_map(|attribute| {
|
||||||
|
if let xml::Attribute::Attribute { name, value } = attribute {
|
||||||
|
Some((name, value))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}) {
|
||||||
|
let namespace;
|
||||||
|
let attribute_name;
|
||||||
|
match q_name {
|
||||||
|
xml::QName::PrefixedName(prefixed_name) => {
|
||||||
|
namespace = namespace_stack.iter().rfind(|namespace| {
|
||||||
|
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
|
||||||
|
});
|
||||||
|
attribute_name = prefixed_name.local_part.to_string();
|
||||||
|
}
|
||||||
|
xml::QName::UnprefixedName(unprefixed_name) => {
|
||||||
|
namespace = namespace_stack
|
||||||
|
.iter()
|
||||||
|
.rfind(|namespace| namespace.prefix == None);
|
||||||
|
attribute_name = unprefixed_name.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(namespace) = namespace {
|
||||||
|
let namespace = (*namespace).clone();
|
||||||
|
let name = Name {
|
||||||
|
namespace,
|
||||||
|
name: attribute_name,
|
||||||
|
};
|
||||||
|
let value = value.process()?;
|
||||||
|
if let Some(_value) = attributes.insert(name, value) {
|
||||||
|
return Err(Error::DuplicateAttribute(q_name.to_string()));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(Error::UnqualifiedNamespace(q_name.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let name;
|
||||||
|
let namespace;
|
||||||
|
match &empty_elem_tag.name {
|
||||||
|
xml::QName::PrefixedName(prefixed_name) => {
|
||||||
|
namespace = namespace_stack.iter().rfind(|namespace| {
|
||||||
|
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
|
||||||
|
});
|
||||||
|
name = prefixed_name.local_part.to_string();
|
||||||
|
}
|
||||||
|
xml::QName::UnprefixedName(unprefixed_name) => {
|
||||||
|
namespace = namespace_stack
|
||||||
|
.iter()
|
||||||
|
.rfind(|namespace| namespace.prefix == None);
|
||||||
|
name = unprefixed_name.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let namespace = (*namespace
|
||||||
|
.ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?)
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
let name = Name { namespace, name };
|
||||||
|
|
||||||
|
return Ok(Element {
|
||||||
|
name,
|
||||||
|
namespace_decl: namespace_declarations,
|
||||||
|
attributes,
|
||||||
|
content: Vec::new(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
xml::Element::NotEmpty(s_tag, content, e_tag) => {
|
||||||
|
if s_tag.name != e_tag.name {
|
||||||
|
return Err(Error::MismatchedEndTag(
|
||||||
|
s_tag.name.to_string(),
|
||||||
|
e_tag.name.to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let mut namespace_declarations = HashSet::new();
|
||||||
|
for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
|
||||||
|
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
|
||||||
|
Some((ns_name, value))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}) {
|
||||||
|
let prefix = match prefix {
|
||||||
|
xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
|
||||||
|
Some(prefixed_att_name.to_string())
|
||||||
|
}
|
||||||
|
xml::NSAttName::DefaultAttName => None,
|
||||||
|
};
|
||||||
|
let namespace = Namespace {
|
||||||
|
prefix,
|
||||||
|
namespace: namespace.process()?,
|
||||||
|
};
|
||||||
|
if !namespace_declarations.insert(namespace.clone()) {
|
||||||
|
return Err(Error::DuplicateNameSpace(namespace));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// all namespaces available to the element (from both parent elements and element itself)
|
||||||
|
let namespace_stack: Vec<&Namespace> = namespaces
|
||||||
|
.iter()
|
||||||
|
.flatten()
|
||||||
|
.chain(namespace_declarations.iter())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut attributes = HashMap::new();
|
||||||
|
|
||||||
|
for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| {
|
||||||
|
if let xml::Attribute::Attribute { name, value } = attribute {
|
||||||
|
Some((name, value))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}) {
|
||||||
|
let namespace;
|
||||||
|
let attribute_name;
|
||||||
|
match q_name {
|
||||||
|
xml::QName::PrefixedName(prefixed_name) => {
|
||||||
|
namespace = namespace_stack.iter().rfind(|namespace| {
|
||||||
|
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
|
||||||
|
});
|
||||||
|
attribute_name = prefixed_name.local_part.to_string();
|
||||||
|
}
|
||||||
|
xml::QName::UnprefixedName(unprefixed_name) => {
|
||||||
|
namespace = namespace_stack
|
||||||
|
.iter()
|
||||||
|
.rfind(|namespace| namespace.prefix == None);
|
||||||
|
attribute_name = unprefixed_name.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(namespace) = namespace {
|
||||||
|
let namespace = (*namespace).clone();
|
||||||
|
let name = Name {
|
||||||
|
namespace,
|
||||||
|
name: attribute_name,
|
||||||
|
};
|
||||||
|
let value = value.process()?;
|
||||||
|
if let Some(_value) = attributes.insert(name, value) {
|
||||||
|
return Err(Error::DuplicateAttribute(q_name.to_string()));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(Error::UnqualifiedNamespace(q_name.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let name;
|
||||||
|
let namespace;
|
||||||
|
match &s_tag.name {
|
||||||
|
xml::QName::PrefixedName(prefixed_name) => {
|
||||||
|
namespace = namespace_stack.iter().rfind(|namespace| {
|
||||||
|
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
|
||||||
|
});
|
||||||
|
name = prefixed_name.local_part.to_string();
|
||||||
|
}
|
||||||
|
xml::QName::UnprefixedName(unprefixed_name) => {
|
||||||
|
namespace = namespace_stack
|
||||||
|
.iter()
|
||||||
|
.rfind(|namespace| namespace.prefix == None);
|
||||||
|
name = unprefixed_name.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let namespace = (*namespace
|
||||||
|
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
let name = Name { namespace, name };
|
||||||
|
|
||||||
|
namespaces.push(namespace_declarations.clone());
|
||||||
|
|
||||||
|
let content = Self::content_from_xml(namespaces, content)?;
|
||||||
|
|
||||||
|
namespaces.pop();
|
||||||
|
|
||||||
|
return Ok(Element {
|
||||||
|
name,
|
||||||
|
namespace_decl: namespace_declarations,
|
||||||
|
attributes,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn content_from_xml(
|
||||||
|
namespaces: &mut Vec<HashSet<Namespace>>,
|
||||||
|
element: xml::Content,
|
||||||
|
) -> Result<Vec<Content>> {
|
||||||
|
let mut content = Vec::new();
|
||||||
|
let mut text = element.char_data.map(|str| String::from(*str));
|
||||||
|
for (content_item, char_data) in element.content {
|
||||||
|
match content_item {
|
||||||
|
xml::ContentItem::Element(element) => {
|
||||||
|
text.map(|text| content.push(Content::Text(text)));
|
||||||
|
content.push(Content::Element(Self::element_from_xml(
|
||||||
|
namespaces, element,
|
||||||
|
)?));
|
||||||
|
text = char_data.map(|str| String::from(*str));
|
||||||
|
}
|
||||||
|
xml::ContentItem::Reference(reference) => {
|
||||||
|
let data = reference.process()?;
|
||||||
|
if let Some(text) = &mut text {
|
||||||
|
text.push(data)
|
||||||
|
} else {
|
||||||
|
text = Some(String::from(data))
|
||||||
|
}
|
||||||
|
char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
|
||||||
|
}
|
||||||
|
xml::ContentItem::CDSect(cd_sect) => {
|
||||||
|
if let Some(text) = &mut text {
|
||||||
|
text.push_str(**cd_sect)
|
||||||
|
} else {
|
||||||
|
text = Some(String::from(**cd_sect))
|
||||||
|
}
|
||||||
|
char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
|
||||||
|
}
|
||||||
|
// TODO: is this important?
|
||||||
|
xml::ContentItem::PI(pi) => {
|
||||||
|
char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
|
||||||
|
}
|
||||||
|
// TODO: comments?
|
||||||
|
xml::ContentItem::Comment(comment) => {
|
||||||
|
char_data.map(|char_data| text.as_mut().map(|s| s.push_str(*char_data)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
text.map(|text| content.push(Content::Text(text)));
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// impl<R> Reader<R>
|
// impl<R> Reader<R>
|
||||||
// where
|
// where
|
||||||
// R: AsyncBufReadExt + Unpin,
|
// R: AsyncBufReadExt + Unpin,
|
||||||
|
|
241
src/xml/mod.rs
241
src/xml/mod.rs
|
@ -1,4 +1,6 @@
|
||||||
use std::char;
|
use std::{char, ops::Deref};
|
||||||
|
|
||||||
|
use crate::error::Error;
|
||||||
|
|
||||||
pub mod composers;
|
pub mod composers;
|
||||||
pub mod parsers;
|
pub mod parsers;
|
||||||
|
@ -14,40 +16,91 @@ pub enum NSAttName<'s> {
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct PrefixedAttName<'s>(NCName<'s>);
|
pub struct PrefixedAttName<'s>(NCName<'s>);
|
||||||
|
|
||||||
|
impl<'s> Deref for PrefixedAttName<'s> {
|
||||||
|
type Target = NCName<'s>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [3] DefaultAttName ::= 'xmlns';
|
/// [3] DefaultAttName ::= 'xmlns';
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct DefaultAttName;
|
pub struct DefaultAttName;
|
||||||
|
|
||||||
/// [4] NCName ::= Name - (Char* ':' Char*)
|
/// [4] NCName ::= Name - (Char* ':' Char*)
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct NCName<'s>(&'s str);
|
pub struct NCName<'s>(&'s str);
|
||||||
|
|
||||||
|
impl<'s> Deref for NCName<'s> {
|
||||||
|
type Target = &'s str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [7] QName ::= PrefixedName | UnprefixedName
|
/// [7] QName ::= PrefixedName | UnprefixedName
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub enum QName<'s> {
|
pub enum QName<'s> {
|
||||||
PrefixedName(PrefixedName<'s>),
|
PrefixedName(PrefixedName<'s>),
|
||||||
UnprefixedName(UnprefixedName<'s>),
|
UnprefixedName(UnprefixedName<'s>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'s> ToString for QName<'s> {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
match self {
|
||||||
|
QName::PrefixedName(prefixed_name) => {
|
||||||
|
format!("{}:{}", **prefixed_name.prefix, **prefixed_name.local_part)
|
||||||
|
}
|
||||||
|
QName::UnprefixedName(unprefixed_name) => unprefixed_name.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [8] PrefixedName ::= Prefix ':' LocalPart
|
/// [8] PrefixedName ::= Prefix ':' LocalPart
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct PrefixedName<'s> {
|
pub struct PrefixedName<'s> {
|
||||||
prefix: Prefix<'s>,
|
pub(crate) prefix: Prefix<'s>,
|
||||||
local_part: LocalPart<'s>,
|
pub(crate) local_part: LocalPart<'s>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [9] UnprefixedName ::= LocalPart
|
/// [9] UnprefixedName ::= LocalPart
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct UnprefixedName<'s>(LocalPart<'s>);
|
pub struct UnprefixedName<'s>(LocalPart<'s>);
|
||||||
|
|
||||||
|
impl<'s> Deref for UnprefixedName<'s> {
|
||||||
|
type Target = LocalPart<'s>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [10] Prefix ::= NCName
|
/// [10] Prefix ::= NCName
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct Prefix<'s>(NCName<'s>);
|
pub struct Prefix<'s>(NCName<'s>);
|
||||||
|
|
||||||
|
impl<'s> Deref for Prefix<'s> {
|
||||||
|
type Target = NCName<'s>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [11] LocalPart ::= NCName
|
/// [11] LocalPart ::= NCName
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct LocalPart<'s>(NCName<'s>);
|
pub struct LocalPart<'s>(NCName<'s>);
|
||||||
|
|
||||||
|
impl<'s> Deref for LocalPart<'s> {
|
||||||
|
type Target = NCName<'s>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// xml spec
|
// xml spec
|
||||||
|
|
||||||
/// [1] document ::= prolog element Misc*
|
/// [1] document ::= prolog element Misc*
|
||||||
|
@ -57,6 +110,14 @@ pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct Char(char);
|
pub struct Char(char);
|
||||||
|
|
||||||
|
impl Deref for Char {
|
||||||
|
type Target = char;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
|
@ -66,28 +127,76 @@ pub struct S;
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct NameStartChar(char);
|
pub struct NameStartChar(char);
|
||||||
|
|
||||||
|
impl Deref for NameStartChar {
|
||||||
|
type Target = char;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct NameChar(char);
|
pub struct NameChar(char);
|
||||||
|
|
||||||
|
impl Deref for NameChar {
|
||||||
|
type Target = char;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [5] Name ::= NameStartChar (NameChar)*
|
/// [5] Name ::= NameStartChar (NameChar)*
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct Name<'s>(&'s str);
|
pub struct Name<'s>(&'s str);
|
||||||
|
|
||||||
|
impl<'s> Deref for Name<'s> {
|
||||||
|
type Target = &'s str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [6] Names ::= Name (#x20 Name)*
|
/// [6] Names ::= Name (#x20 Name)*
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct Names<'s>(Vec<Name<'s>>);
|
pub struct Names<'s>(Vec<Name<'s>>);
|
||||||
|
|
||||||
|
impl<'s> Deref for Names<'s> {
|
||||||
|
type Target = Vec<Name<'s>>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [7] Nmtoken ::= (NameChar)+
|
/// [7] Nmtoken ::= (NameChar)+
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct Nmtoken<'s>(&'s str);
|
pub struct Nmtoken<'s>(&'s str);
|
||||||
|
|
||||||
|
impl<'s> Deref for Nmtoken<'s> {
|
||||||
|
type Target = &'s str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
|
/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>);
|
pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>);
|
||||||
|
|
||||||
|
impl<'s> Deref for Nmtokens<'s> {
|
||||||
|
type Target = Vec<Nmtoken<'s>>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum EntityValueData<'s> {
|
pub enum EntityValueData<'s> {
|
||||||
String(&'s str),
|
String(&'s str),
|
||||||
|
@ -115,6 +224,24 @@ pub enum AttValue<'s> {
|
||||||
SingleQuoted(Vec<AttValueData<'s>>),
|
SingleQuoted(Vec<AttValueData<'s>>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'s> AttValue<'s> {
|
||||||
|
pub fn process(&self) -> crate::Result<String> {
|
||||||
|
let mut output = String::new();
|
||||||
|
let data;
|
||||||
|
match self {
|
||||||
|
AttValue::DoubleQuoted(vec) => data = vec,
|
||||||
|
AttValue::SingleQuoted(vec) => data = vec,
|
||||||
|
}
|
||||||
|
for data in data {
|
||||||
|
match data {
|
||||||
|
AttValueData::String(s) => output.push_str(s),
|
||||||
|
AttValueData::Reference(reference) => output.push(reference.process()?),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
|
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum SystemLiteral<'s> {
|
pub enum SystemLiteral<'s> {
|
||||||
|
@ -138,11 +265,27 @@ pub struct PubidChar(char);
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct CharData<'s>(&'s str);
|
pub struct CharData<'s>(&'s str);
|
||||||
|
|
||||||
|
impl<'s> Deref for CharData<'s> {
|
||||||
|
type Target = &'s str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
|
/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct Comment<'s>(&'s str);
|
pub struct Comment<'s>(&'s str);
|
||||||
|
|
||||||
|
impl<'s> Deref for Comment<'s> {
|
||||||
|
type Target = &'s str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct PI<'s> {
|
pub struct PI<'s> {
|
||||||
|
@ -160,6 +303,14 @@ pub struct PITarget<'s>(Name<'s>);
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct CDSect<'s>(CData<'s>);
|
pub struct CDSect<'s>(CData<'s>);
|
||||||
|
|
||||||
|
impl<'s> Deref for CDSect<'s> {
|
||||||
|
type Target = CData<'s>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [19] CDStart ::= '<![CDATA['
|
/// [19] CDStart ::= '<![CDATA['
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct CDStart;
|
pub struct CDStart;
|
||||||
|
@ -169,6 +320,14 @@ pub struct CDStart;
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct CData<'s>(&'s str);
|
pub struct CData<'s>(&'s str);
|
||||||
|
|
||||||
|
impl<'s> Deref for CData<'s> {
|
||||||
|
type Target = &'s str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [21] CDEnd ::= ']]>'
|
/// [21] CDEnd ::= ']]>'
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct CDEnd;
|
pub struct CDEnd;
|
||||||
|
@ -286,8 +445,8 @@ pub enum Element<'s> {
|
||||||
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
|
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct STag<'s> {
|
pub struct STag<'s> {
|
||||||
name: QName<'s>,
|
pub(crate) name: QName<'s>,
|
||||||
attributes: Vec<Attribute<'s>>,
|
pub(crate) attributes: Vec<Attribute<'s>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
|
/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
|
||||||
|
@ -309,7 +468,7 @@ pub enum Attribute<'s> {
|
||||||
/// [42] ETag ::= '</' Name S? '>'
|
/// [42] ETag ::= '</' Name S? '>'
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct ETag<'s> {
|
pub struct ETag<'s> {
|
||||||
name: QName<'s>,
|
pub(crate) name: QName<'s>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
@ -324,16 +483,16 @@ pub enum ContentItem<'s> {
|
||||||
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
|
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Content<'s> {
|
pub struct Content<'s> {
|
||||||
char_data: Option<CharData<'s>>,
|
pub(crate) char_data: Option<CharData<'s>>,
|
||||||
content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
|
pub(crate) content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
|
/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
|
||||||
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
|
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct EmptyElemTag<'s> {
|
pub struct EmptyElemTag<'s> {
|
||||||
name: QName<'s>,
|
pub(crate) name: QName<'s>,
|
||||||
attributes: Vec<Attribute<'s>>,
|
pub(crate) attributes: Vec<Attribute<'s>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
|
/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
|
||||||
|
@ -503,6 +662,32 @@ pub enum CharRef<'s> {
|
||||||
Hexadecimal(&'s str),
|
Hexadecimal(&'s str),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'s> CharRef<'s> {
|
||||||
|
pub fn process(&self) -> crate::Result<char> {
|
||||||
|
let int: u32;
|
||||||
|
match self {
|
||||||
|
CharRef::Decimal(dec) => {
|
||||||
|
int = dec.parse()?;
|
||||||
|
}
|
||||||
|
CharRef::Hexadecimal(hex) => {
|
||||||
|
int = <u32>::from_str_radix(hex, 16)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let c = std::char::from_u32(int);
|
||||||
|
|
||||||
|
let c = c.ok_or_else(|| Error::InvalidCharRef(int.to_string()))?;
|
||||||
|
if matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}')
|
||||||
|
{
|
||||||
|
return Ok(c);
|
||||||
|
} else {
|
||||||
|
return Err(Error::InvalidCharRef(format!(
|
||||||
|
"{} is not a valid xml char",
|
||||||
|
c
|
||||||
|
)));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [67] Reference ::= EntityRef | CharRef
|
/// [67] Reference ::= EntityRef | CharRef
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum Reference<'s> {
|
pub enum Reference<'s> {
|
||||||
|
@ -510,10 +695,34 @@ pub enum Reference<'s> {
|
||||||
CharRef(CharRef<'s>),
|
CharRef(CharRef<'s>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'s> Reference<'s> {
|
||||||
|
pub fn process(&self) -> crate::Result<char> {
|
||||||
|
match self {
|
||||||
|
Reference::EntityRef(entity_ref) => match *entity_ref.deref().deref() {
|
||||||
|
"amp" => Ok('&'),
|
||||||
|
"lt" => Ok('<'),
|
||||||
|
"gt" => Ok('>'),
|
||||||
|
"apos" => Ok('\''),
|
||||||
|
"quot" => Ok('"'),
|
||||||
|
e => return Err(Error::EntityProcessError(e.to_string())),
|
||||||
|
},
|
||||||
|
Reference::CharRef(char_ref) => Ok(char_ref.process()?),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [68] EntityRef ::= '&' Name ';'
|
/// [68] EntityRef ::= '&' Name ';'
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct EntityRef<'s>(Name<'s>);
|
pub struct EntityRef<'s>(Name<'s>);
|
||||||
|
|
||||||
|
impl<'s> Deref for EntityRef<'s> {
|
||||||
|
type Target = Name<'s>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// [69] PEReference ::= '%' Name ';'
|
/// [69] PEReference ::= '%' Name ';'
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
|
|
Loading…
Reference in New Issue