misc
This commit is contained in:
parent
c1e6f7e918
commit
87e6ff405b
|
@ -0,0 +1,21 @@
|
|||
pub struct Declaration {
|
||||
pub version_info: VersionInfo,
|
||||
pub encoding_decl: Option<String>,
|
||||
pub sd_decl: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum VersionInfo {
|
||||
One,
|
||||
OneDotOne,
|
||||
}
|
||||
|
||||
impl Declaration {
|
||||
pub fn version(version: VersionInfo) -> Self {
|
||||
Self {
|
||||
version_info: version,
|
||||
encoding_decl: None,
|
||||
sd_decl: None,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -9,8 +9,22 @@ use std::{
|
|||
use crate::{
|
||||
error::Error,
|
||||
xml::{self, parsers_complete::Parser, Attribute},
|
||||
Result,
|
||||
};
|
||||
|
||||
pub trait FromElement: Sized {
|
||||
fn from_element(element: Element) -> Result<Self>;
|
||||
}
|
||||
|
||||
pub trait IntoElement {
|
||||
fn into_element(&self) -> Element;
|
||||
|
||||
fn get_content(&self) -> Vec<Content> {
|
||||
let element = self.into_element();
|
||||
element.content
|
||||
}
|
||||
}
|
||||
|
||||
// when are namespaces names chosen then if they are automatically calculated
|
||||
// namespaces are held by readers and writers.
|
||||
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
|
||||
|
@ -26,7 +40,7 @@ pub struct Name {
|
|||
pub local_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Content {
|
||||
Element(Element),
|
||||
Text(String),
|
||||
|
@ -35,7 +49,7 @@ pub enum Content {
|
|||
}
|
||||
|
||||
// should this be a trait?
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Element {
|
||||
pub name: Name,
|
||||
// namespace: Name,
|
||||
|
@ -51,6 +65,7 @@ pub struct Element {
|
|||
// you can validate the prefix and calculate the namespace from the current reader context
|
||||
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
|
||||
pub attributes: HashMap<Name, String>,
|
||||
// TODO: make a hashmap maybe? to be able to address parts of the content individually
|
||||
pub content: Vec<Content>,
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::{num::ParseIntError, str::Utf8Error};
|
||||
|
||||
use crate::element::{Name, NamespaceDeclaration};
|
||||
use crate::element::{Content, Name, NamespaceDeclaration};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
|
@ -17,6 +17,12 @@ pub enum Error {
|
|||
NotInElement(String),
|
||||
ExtraData(String),
|
||||
UndeclaredNamespace(String),
|
||||
IncorrectName(Name),
|
||||
UnexpectedAttribute(Name),
|
||||
DeserializeError(String),
|
||||
UnexpectedNumberOfContents(usize),
|
||||
UnexpectedContent(Content),
|
||||
UnexpectedElement(Name),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
|
|
11
src/lib.rs
11
src/lib.rs
|
@ -1,7 +1,16 @@
|
|||
mod element;
|
||||
pub mod declaration;
|
||||
pub mod element;
|
||||
mod error;
|
||||
pub mod reader;
|
||||
mod writer;
|
||||
pub mod xml;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
|
||||
pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace";
|
||||
pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/";
|
||||
|
||||
pub use element::Element;
|
||||
pub use error::Error;
|
||||
pub use reader::Reader;
|
||||
pub use writer::Writer;
|
||||
|
|
108
src/reader.rs
108
src/reader.rs
|
@ -2,7 +2,7 @@ use circular::Buffer;
|
|||
use futures::{FutureExt, Stream};
|
||||
use nom::Err;
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
collections::{hash_set, BTreeMap, HashMap, HashSet},
|
||||
future::Future,
|
||||
path::Prefix,
|
||||
pin::{pin, Pin},
|
||||
|
@ -10,19 +10,20 @@ use std::{
|
|||
};
|
||||
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
|
||||
|
||||
static MAX_STANZA_SIZE: usize = 65536;
|
||||
|
||||
use crate::{
|
||||
element::{Content, Element, Name, NamespaceDeclaration},
|
||||
declaration::{Declaration, VersionInfo},
|
||||
element::{Content, Element, FromElement, Name, NamespaceDeclaration},
|
||||
error::Error,
|
||||
xml::{self, parsers::Parser},
|
||||
Result,
|
||||
Result, XMLNS_NS, XML_NS,
|
||||
};
|
||||
|
||||
static MAX_STANZA_SIZE: usize = 65536;
|
||||
|
||||
/// streaming reader that tracks depth and available namespaces at current depth
|
||||
pub struct Reader<R> {
|
||||
inner: R,
|
||||
buffer: Buffer,
|
||||
pub buffer: Buffer,
|
||||
// holds which tags we are in atm over depth
|
||||
// to have names reference namespaces could
|
||||
depth: Vec<Name>,
|
||||
|
@ -31,13 +32,27 @@ pub struct Reader<R> {
|
|||
|
||||
impl<R> Reader<R> {
|
||||
pub fn new(reader: R) -> Self {
|
||||
let mut default_declarations = HashSet::new();
|
||||
default_declarations.insert(NamespaceDeclaration {
|
||||
prefix: Some("xml".to_string()),
|
||||
namespace: XML_NS.to_string(),
|
||||
});
|
||||
default_declarations.insert(NamespaceDeclaration {
|
||||
prefix: Some("xmlns".to_string()),
|
||||
namespace: XMLNS_NS.to_string(),
|
||||
});
|
||||
Self {
|
||||
inner: reader,
|
||||
buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
|
||||
depth: Vec::new(),
|
||||
namespace_declarations: Vec::new(),
|
||||
// TODO: make sure reserved namespaces are never overwritten
|
||||
namespace_declarations: vec![default_declarations],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> R {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> Reader<R>
|
||||
|
@ -48,18 +63,35 @@ where
|
|||
Ok(self.inner.read_buf(&mut self.buffer).await?)
|
||||
}
|
||||
|
||||
pub async fn read_prolog<'s>(&'s mut self) -> Result<()> {
|
||||
pub async fn read_prolog<'s>(&'s mut self) -> Result<Option<Declaration>> {
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
match xml::Prolog::parse(input) {
|
||||
Ok((rest, _prolog)) => {
|
||||
Ok((rest, (decl, _misc, _doctype_decl))) => {
|
||||
let len = self.buffer.available_data() - rest.as_bytes().len();
|
||||
self.buffer.consume(len);
|
||||
return Ok(());
|
||||
// TODO: return error if there is a doctype decl
|
||||
if let Some(decl) = decl {
|
||||
let declaration = Declaration {
|
||||
version_info: match *decl.version_info {
|
||||
xml::VersionNum::One => VersionInfo::One,
|
||||
xml::VersionNum::OneDotOne => VersionInfo::OneDotOne,
|
||||
},
|
||||
encoding_decl: decl
|
||||
.encoding_decl
|
||||
.map(|encoding_decl| (**encoding_decl).to_string()),
|
||||
sd_decl: decl.sd_decl.map(|sd_decl| *sd_decl),
|
||||
};
|
||||
self.buffer.consume(len);
|
||||
return Ok(Some(declaration));
|
||||
} else {
|
||||
self.buffer.consume(len);
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => {}
|
||||
Err::Incomplete(_) => {
|
||||
self.read_buf().await?;
|
||||
}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
|
@ -68,9 +100,18 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> {
|
||||
let element = self.read_start_tag().await?;
|
||||
FromElement::from_element(element)
|
||||
}
|
||||
|
||||
pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> {
|
||||
let element = self.read_element().await?;
|
||||
FromElement::from_element(element)
|
||||
}
|
||||
|
||||
pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
match xml::STag::parse(input) {
|
||||
Ok((rest, e)) => {
|
||||
|
@ -84,7 +125,9 @@ where
|
|||
return Ok(element);
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => {}
|
||||
Err::Incomplete(_) => {
|
||||
self.read_buf().await?;
|
||||
}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
|
@ -95,7 +138,6 @@ where
|
|||
|
||||
pub async fn read_end_tag<'s>(&'s mut self) -> Result<()> {
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
match xml::ETag::parse(input) {
|
||||
Ok((rest, e)) => {
|
||||
|
@ -109,7 +151,9 @@ where
|
|||
return Ok(());
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => {}
|
||||
Err::Incomplete(_) => {
|
||||
self.read_buf().await?;
|
||||
}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
|
@ -120,7 +164,6 @@ where
|
|||
|
||||
pub async fn read_element<'s>(&'s mut self) -> Result<Element> {
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
match xml::Element::parse(input) {
|
||||
Ok((rest, e)) => {
|
||||
|
@ -131,7 +174,9 @@ where
|
|||
return Ok(element);
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => {}
|
||||
Err::Incomplete(_) => {
|
||||
self.read_buf().await?;
|
||||
}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
|
@ -144,7 +189,6 @@ where
|
|||
let mut last_char = false;
|
||||
let mut text = String::new();
|
||||
loop {
|
||||
self.read_buf().await?;
|
||||
let input = str::from_utf8(self.buffer.data())?;
|
||||
if last_char == false {
|
||||
match xml::CharData::parse(input) {
|
||||
|
@ -155,7 +199,9 @@ where
|
|||
last_char = true;
|
||||
}
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_needed) => continue,
|
||||
Err::Incomplete(_) => {
|
||||
self.read_buf().await?;
|
||||
}
|
||||
_ => match xml::ContentItem::parse(input) {
|
||||
Ok((rest, content_item)) => match content_item {
|
||||
xml::ContentItem::Element(element) => {
|
||||
|
@ -207,7 +253,9 @@ where
|
|||
}
|
||||
},
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => continue,
|
||||
Err::Incomplete(_) => {
|
||||
self.read_buf().await?;
|
||||
}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
|
@ -263,7 +311,9 @@ where
|
|||
}
|
||||
},
|
||||
std::result::Result::Err(e) => match e {
|
||||
Err::Incomplete(_) => continue,
|
||||
Err::Incomplete(_) => {
|
||||
self.read_buf().await?;
|
||||
}
|
||||
// TODO: better error
|
||||
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
|
||||
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
|
||||
|
@ -622,7 +672,11 @@ impl<R> Reader<R> {
|
|||
for (content_item, char_data) in xml_content.content {
|
||||
match content_item {
|
||||
xml::ContentItem::Element(element) => {
|
||||
text.map(|text| content.push(Content::Text(text)));
|
||||
text.map(|text| {
|
||||
if !text.is_empty() {
|
||||
content.push(Content::Text(text))
|
||||
}
|
||||
});
|
||||
content.push(Content::Element(Self::element_from_xml(
|
||||
namespaces, element,
|
||||
)?));
|
||||
|
@ -655,7 +709,11 @@ impl<R> Reader<R> {
|
|||
}
|
||||
}
|
||||
}
|
||||
text.map(|text| content.push(Content::Text(text)));
|
||||
text.map(|text| {
|
||||
if !text.is_empty() {
|
||||
content.push(Content::Text(text))
|
||||
}
|
||||
});
|
||||
Ok(content)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,10 +5,11 @@ use futures::Sink;
|
|||
use tokio::io::{AsyncWrite, AsyncWriteExt};
|
||||
|
||||
use crate::{
|
||||
element::{escape_str, Content, Element, Name, NamespaceDeclaration},
|
||||
declaration::{Declaration, VersionInfo},
|
||||
element::{escape_str, Content, Element, IntoElement, Name, NamespaceDeclaration},
|
||||
error::Error,
|
||||
xml::{self, composers::Composer, parsers_complete::Parser, ETag},
|
||||
Result,
|
||||
xml::{self, composers::Composer, parsers_complete::Parser, ETag, XMLDecl},
|
||||
Result, XMLNS_NS, XML_NS,
|
||||
};
|
||||
|
||||
// pub struct Writer<W, C = Composer> {
|
||||
|
@ -20,21 +21,69 @@ pub struct Writer<W> {
|
|||
|
||||
impl<W> Writer<W> {
|
||||
pub fn new(writer: W) -> Self {
|
||||
let mut default_declarations = HashSet::new();
|
||||
default_declarations.insert(NamespaceDeclaration {
|
||||
prefix: Some("xml".to_string()),
|
||||
namespace: XML_NS.to_string(),
|
||||
});
|
||||
default_declarations.insert(NamespaceDeclaration {
|
||||
prefix: Some("xmlns".to_string()),
|
||||
namespace: XMLNS_NS.to_string(),
|
||||
});
|
||||
Self {
|
||||
inner: writer,
|
||||
depth: Vec::new(),
|
||||
namespace_declarations: Vec::new(),
|
||||
namespace_declarations: vec![default_declarations],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> W {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: AsyncWrite + Unpin + Send> Writer<W> {
|
||||
pub async fn write_declaration(&mut self, version: VersionInfo) -> Result<()> {
|
||||
let declaration = Declaration::version(version);
|
||||
let version_info;
|
||||
match declaration.version_info {
|
||||
VersionInfo::One => version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::One),
|
||||
VersionInfo::OneDotOne => {
|
||||
version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::OneDotOne)
|
||||
}
|
||||
}
|
||||
let declaration = xml::XMLDecl {
|
||||
version_info,
|
||||
encoding_decl: None,
|
||||
sd_decl: None,
|
||||
};
|
||||
declaration.write(&mut self.inner).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn write_full(&mut self, into_element: &impl IntoElement) -> Result<()> {
|
||||
let element = into_element.into_element();
|
||||
Ok(self.write_element(&element).await?)
|
||||
}
|
||||
|
||||
pub async fn write_start(&mut self, into_element: &impl IntoElement) -> Result<()> {
|
||||
let element = into_element.into_element();
|
||||
Ok(self.write_element_start(&element).await?)
|
||||
}
|
||||
|
||||
pub async fn write_all_content(&mut self, into_element: &impl IntoElement) -> Result<()> {
|
||||
for content in &into_element.get_content() {
|
||||
self.write_content(content).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[async_recursion]
|
||||
pub async fn write_element(&mut self, element: &Element) -> Result<()> {
|
||||
if element.content.is_empty() {
|
||||
self.write_empty(element).await?;
|
||||
} else {
|
||||
self.write_start(element).await?;
|
||||
self.write_element_start(element).await?;
|
||||
for content in &element.content {
|
||||
self.write_content(content).await?;
|
||||
}
|
||||
|
@ -107,12 +156,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
|
|||
if let Some(prefix) = &prefix {
|
||||
att_name = xml::QName::PrefixedName(xml::PrefixedName {
|
||||
prefix: xml::Prefix::parse_full(prefix)?,
|
||||
local_part: xml::LocalPart::parse_full(&element.name.local_name)?,
|
||||
local_part: xml::LocalPart::parse_full(&name.local_name)?,
|
||||
})
|
||||
} else {
|
||||
att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(
|
||||
&element.name.local_name,
|
||||
)?)
|
||||
att_name =
|
||||
xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)
|
||||
}
|
||||
|
||||
let value = xml::AttValue::from(value.as_str());
|
||||
|
@ -131,7 +179,7 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn write_start(&mut self, element: &Element) -> Result<()> {
|
||||
pub async fn write_element_start(&mut self, element: &Element) -> Result<()> {
|
||||
let namespace_declarations_stack: Vec<_> = self
|
||||
.namespace_declarations
|
||||
.iter()
|
||||
|
@ -195,12 +243,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
|
|||
if let Some(prefix) = &prefix {
|
||||
att_name = xml::QName::PrefixedName(xml::PrefixedName {
|
||||
prefix: xml::Prefix::parse_full(prefix)?,
|
||||
local_part: xml::LocalPart::parse_full(&element.name.local_name)?,
|
||||
local_part: xml::LocalPart::parse_full(&name.local_name)?,
|
||||
})
|
||||
} else {
|
||||
att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(
|
||||
&element.name.local_name,
|
||||
)?)
|
||||
att_name =
|
||||
xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)
|
||||
}
|
||||
|
||||
let value = xml::AttValue::from(value.as_str());
|
||||
|
|
|
@ -389,9 +389,9 @@ pub type Prolog<'s> = (
|
|||
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
|
||||
#[derive(Debug)]
|
||||
pub struct XMLDecl<'s> {
|
||||
version_info: VersionInfo,
|
||||
encoding_decl: Option<EncodingDecl<'s>>,
|
||||
sd_decl: Option<SDDecl>,
|
||||
pub(crate) version_info: VersionInfo,
|
||||
pub(crate) encoding_decl: Option<EncodingDecl<'s>>,
|
||||
pub(crate) sd_decl: Option<SDDecl>,
|
||||
}
|
||||
|
||||
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
|
||||
|
@ -401,6 +401,17 @@ pub enum VersionInfo {
|
|||
DoubleQuoted(VersionNum),
|
||||
}
|
||||
|
||||
impl Deref for VersionInfo {
|
||||
type Target = VersionNum;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match self {
|
||||
VersionInfo::SingleQuoted(version_num) => version_num,
|
||||
VersionInfo::DoubleQuoted(version_num) => version_num,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// [25] Eq ::= S? '=' S?
|
||||
#[derive(Clone)]
|
||||
pub struct Eq;
|
||||
|
@ -479,6 +490,17 @@ pub enum SDDecl {
|
|||
DoubleQuoted(bool),
|
||||
}
|
||||
|
||||
impl Deref for SDDecl {
|
||||
type Target = bool;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match self {
|
||||
SDDecl::SingleQuoted(b) => b,
|
||||
SDDecl::DoubleQuoted(b) => b,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// (Productions 33 through 38 have been removed.)
|
||||
|
||||
/// [39] element ::= EmptyElemTag | STag content ETag
|
||||
|
@ -846,10 +868,26 @@ pub struct ExtParsedEnt<'s> {
|
|||
// TODO?: select quote version
|
||||
pub struct EncodingDecl<'s>(EncName<'s>);
|
||||
|
||||
impl<'s> Deref for EncodingDecl<'s> {
|
||||
type Target = EncName<'s>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
|
||||
#[derive(Debug)]
|
||||
pub struct EncName<'s>(&'s str);
|
||||
|
||||
impl<'s> Deref for EncName<'s> {
|
||||
type Target = &'s str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum NotationDeclID<'s> {
|
||||
External(ExternalID<'s>),
|
||||
|
|
Loading…
Reference in New Issue