This commit is contained in:
cel 🌸 2024-11-24 02:05:41 +00:00
parent c1e6f7e918
commit 87e6ff405b
7 changed files with 240 additions and 46 deletions

21
src/declaration.rs Normal file
View File

@ -0,0 +1,21 @@
pub struct Declaration {
pub version_info: VersionInfo,
pub encoding_decl: Option<String>,
pub sd_decl: Option<bool>,
}
#[derive(Clone, Copy)]
pub enum VersionInfo {
One,
OneDotOne,
}
impl Declaration {
pub fn version(version: VersionInfo) -> Self {
Self {
version_info: version,
encoding_decl: None,
sd_decl: None,
}
}
}

View File

@ -9,8 +9,22 @@ use std::{
use crate::{ use crate::{
error::Error, error::Error,
xml::{self, parsers_complete::Parser, Attribute}, xml::{self, parsers_complete::Parser, Attribute},
Result,
}; };
pub trait FromElement: Sized {
fn from_element(element: Element) -> Result<Self>;
}
pub trait IntoElement {
fn into_element(&self) -> Element;
fn get_content(&self) -> Vec<Content> {
let element = self.into_element();
element.content
}
}
// when are namespaces names chosen then if they are automatically calculated // when are namespaces names chosen then if they are automatically calculated
// namespaces are held by readers and writers. // namespaces are held by readers and writers.
#[derive(PartialEq, Eq, Hash, Clone, Debug)] #[derive(PartialEq, Eq, Hash, Clone, Debug)]
@ -26,7 +40,7 @@ pub struct Name {
pub local_name: String, pub local_name: String,
} }
#[derive(Debug)] #[derive(Debug, Clone)]
pub enum Content { pub enum Content {
Element(Element), Element(Element),
Text(String), Text(String),
@ -35,7 +49,7 @@ pub enum Content {
} }
// should this be a trait? // should this be a trait?
#[derive(Debug)] #[derive(Debug, Clone)]
pub struct Element { pub struct Element {
pub name: Name, pub name: Name,
// namespace: Name, // namespace: Name,
@ -51,6 +65,7 @@ pub struct Element {
// you can validate the prefix and calculate the namespace from the current reader context // you can validate the prefix and calculate the namespace from the current reader context
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified. // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
pub attributes: HashMap<Name, String>, pub attributes: HashMap<Name, String>,
// TODO: make a hashmap maybe? to be able to address parts of the content individually
pub content: Vec<Content>, pub content: Vec<Content>,
} }

View File

@ -1,6 +1,6 @@
use std::{num::ParseIntError, str::Utf8Error}; use std::{num::ParseIntError, str::Utf8Error};
use crate::element::{Name, NamespaceDeclaration}; use crate::element::{Content, Name, NamespaceDeclaration};
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
@ -17,6 +17,12 @@ pub enum Error {
NotInElement(String), NotInElement(String),
ExtraData(String), ExtraData(String),
UndeclaredNamespace(String), UndeclaredNamespace(String),
IncorrectName(Name),
UnexpectedAttribute(Name),
DeserializeError(String),
UnexpectedNumberOfContents(usize),
UnexpectedContent(Content),
UnexpectedElement(Name),
} }
impl From<std::io::Error> for Error { impl From<std::io::Error> for Error {

View File

@ -1,7 +1,16 @@
mod element; pub mod declaration;
pub mod element;
mod error; mod error;
pub mod reader; pub mod reader;
mod writer; mod writer;
pub mod xml; pub mod xml;
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T> = std::result::Result<T, error::Error>;
pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace";
pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/";
pub use element::Element;
pub use error::Error;
pub use reader::Reader;
pub use writer::Writer;

View File

@ -2,7 +2,7 @@ use circular::Buffer;
use futures::{FutureExt, Stream}; use futures::{FutureExt, Stream};
use nom::Err; use nom::Err;
use std::{ use std::{
collections::{BTreeMap, HashMap, HashSet}, collections::{hash_set, BTreeMap, HashMap, HashSet},
future::Future, future::Future,
path::Prefix, path::Prefix,
pin::{pin, Pin}, pin::{pin, Pin},
@ -10,19 +10,20 @@ use std::{
}; };
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt}; use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
static MAX_STANZA_SIZE: usize = 65536;
use crate::{ use crate::{
element::{Content, Element, Name, NamespaceDeclaration}, declaration::{Declaration, VersionInfo},
element::{Content, Element, FromElement, Name, NamespaceDeclaration},
error::Error, error::Error,
xml::{self, parsers::Parser}, xml::{self, parsers::Parser},
Result, Result, XMLNS_NS, XML_NS,
}; };
static MAX_STANZA_SIZE: usize = 65536;
/// streaming reader that tracks depth and available namespaces at current depth /// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> { pub struct Reader<R> {
inner: R, inner: R,
buffer: Buffer, pub buffer: Buffer,
// holds which tags we are in atm over depth // holds which tags we are in atm over depth
// to have names reference namespaces could // to have names reference namespaces could
depth: Vec<Name>, depth: Vec<Name>,
@ -31,13 +32,27 @@ pub struct Reader<R> {
impl<R> Reader<R> { impl<R> Reader<R> {
pub fn new(reader: R) -> Self { pub fn new(reader: R) -> Self {
let mut default_declarations = HashSet::new();
default_declarations.insert(NamespaceDeclaration {
prefix: Some("xml".to_string()),
namespace: XML_NS.to_string(),
});
default_declarations.insert(NamespaceDeclaration {
prefix: Some("xmlns".to_string()),
namespace: XMLNS_NS.to_string(),
});
Self { Self {
inner: reader, inner: reader,
buffer: Buffer::with_capacity(MAX_STANZA_SIZE), buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
depth: Vec::new(), depth: Vec::new(),
namespace_declarations: Vec::new(), // TODO: make sure reserved namespaces are never overwritten
namespace_declarations: vec![default_declarations],
} }
} }
pub fn into_inner(self) -> R {
self.inner
}
} }
impl<R> Reader<R> impl<R> Reader<R>
@ -48,18 +63,35 @@ where
Ok(self.inner.read_buf(&mut self.buffer).await?) Ok(self.inner.read_buf(&mut self.buffer).await?)
} }
pub async fn read_prolog<'s>(&'s mut self) -> Result<()> { pub async fn read_prolog<'s>(&'s mut self) -> Result<Option<Declaration>> {
loop { loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?; let input = str::from_utf8(self.buffer.data())?;
match xml::Prolog::parse(input) { match xml::Prolog::parse(input) {
Ok((rest, _prolog)) => { Ok((rest, (decl, _misc, _doctype_decl))) => {
let len = self.buffer.available_data() - rest.as_bytes().len(); let len = self.buffer.available_data() - rest.as_bytes().len();
self.buffer.consume(len); // TODO: return error if there is a doctype decl
return Ok(()); if let Some(decl) = decl {
let declaration = Declaration {
version_info: match *decl.version_info {
xml::VersionNum::One => VersionInfo::One,
xml::VersionNum::OneDotOne => VersionInfo::OneDotOne,
},
encoding_decl: decl
.encoding_decl
.map(|encoding_decl| (**encoding_decl).to_string()),
sd_decl: decl.sd_decl.map(|sd_decl| *sd_decl),
};
self.buffer.consume(len);
return Ok(Some(declaration));
} else {
self.buffer.consume(len);
return Ok(None);
}
} }
std::result::Result::Err(e) => match e { std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {} Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error // TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())), Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())), Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -68,9 +100,18 @@ where
} }
} }
pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> {
let element = self.read_start_tag().await?;
FromElement::from_element(element)
}
pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> {
let element = self.read_element().await?;
FromElement::from_element(element)
}
pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> { pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
loop { loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?; let input = str::from_utf8(self.buffer.data())?;
match xml::STag::parse(input) { match xml::STag::parse(input) {
Ok((rest, e)) => { Ok((rest, e)) => {
@ -84,7 +125,9 @@ where
return Ok(element); return Ok(element);
} }
std::result::Result::Err(e) => match e { std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {} Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error // TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())), Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())), Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -95,7 +138,6 @@ where
pub async fn read_end_tag<'s>(&'s mut self) -> Result<()> { pub async fn read_end_tag<'s>(&'s mut self) -> Result<()> {
loop { loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?; let input = str::from_utf8(self.buffer.data())?;
match xml::ETag::parse(input) { match xml::ETag::parse(input) {
Ok((rest, e)) => { Ok((rest, e)) => {
@ -109,7 +151,9 @@ where
return Ok(()); return Ok(());
} }
std::result::Result::Err(e) => match e { std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {} Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error // TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())), Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())), Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -120,7 +164,6 @@ where
pub async fn read_element<'s>(&'s mut self) -> Result<Element> { pub async fn read_element<'s>(&'s mut self) -> Result<Element> {
loop { loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?; let input = str::from_utf8(self.buffer.data())?;
match xml::Element::parse(input) { match xml::Element::parse(input) {
Ok((rest, e)) => { Ok((rest, e)) => {
@ -131,7 +174,9 @@ where
return Ok(element); return Ok(element);
} }
std::result::Result::Err(e) => match e { std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {} Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error // TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())), Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())), Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -144,7 +189,6 @@ where
let mut last_char = false; let mut last_char = false;
let mut text = String::new(); let mut text = String::new();
loop { loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?; let input = str::from_utf8(self.buffer.data())?;
if last_char == false { if last_char == false {
match xml::CharData::parse(input) { match xml::CharData::parse(input) {
@ -155,7 +199,9 @@ where
last_char = true; last_char = true;
} }
std::result::Result::Err(e) => match e { std::result::Result::Err(e) => match e {
Err::Incomplete(_needed) => continue, Err::Incomplete(_) => {
self.read_buf().await?;
}
_ => match xml::ContentItem::parse(input) { _ => match xml::ContentItem::parse(input) {
Ok((rest, content_item)) => match content_item { Ok((rest, content_item)) => match content_item {
xml::ContentItem::Element(element) => { xml::ContentItem::Element(element) => {
@ -207,7 +253,9 @@ where
} }
}, },
std::result::Result::Err(e) => match e { std::result::Result::Err(e) => match e {
Err::Incomplete(_) => continue, Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error // TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())), Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())), Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -263,7 +311,9 @@ where
} }
}, },
std::result::Result::Err(e) => match e { std::result::Result::Err(e) => match e {
Err::Incomplete(_) => continue, Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error // TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())), Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())), Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -622,7 +672,11 @@ impl<R> Reader<R> {
for (content_item, char_data) in xml_content.content { for (content_item, char_data) in xml_content.content {
match content_item { match content_item {
xml::ContentItem::Element(element) => { xml::ContentItem::Element(element) => {
text.map(|text| content.push(Content::Text(text))); text.map(|text| {
if !text.is_empty() {
content.push(Content::Text(text))
}
});
content.push(Content::Element(Self::element_from_xml( content.push(Content::Element(Self::element_from_xml(
namespaces, element, namespaces, element,
)?)); )?));
@ -655,7 +709,11 @@ impl<R> Reader<R> {
} }
} }
} }
text.map(|text| content.push(Content::Text(text))); text.map(|text| {
if !text.is_empty() {
content.push(Content::Text(text))
}
});
Ok(content) Ok(content)
} }
} }

View File

@ -5,10 +5,11 @@ use futures::Sink;
use tokio::io::{AsyncWrite, AsyncWriteExt}; use tokio::io::{AsyncWrite, AsyncWriteExt};
use crate::{ use crate::{
element::{escape_str, Content, Element, Name, NamespaceDeclaration}, declaration::{Declaration, VersionInfo},
element::{escape_str, Content, Element, IntoElement, Name, NamespaceDeclaration},
error::Error, error::Error,
xml::{self, composers::Composer, parsers_complete::Parser, ETag}, xml::{self, composers::Composer, parsers_complete::Parser, ETag, XMLDecl},
Result, Result, XMLNS_NS, XML_NS,
}; };
// pub struct Writer<W, C = Composer> { // pub struct Writer<W, C = Composer> {
@ -20,21 +21,69 @@ pub struct Writer<W> {
impl<W> Writer<W> { impl<W> Writer<W> {
pub fn new(writer: W) -> Self { pub fn new(writer: W) -> Self {
let mut default_declarations = HashSet::new();
default_declarations.insert(NamespaceDeclaration {
prefix: Some("xml".to_string()),
namespace: XML_NS.to_string(),
});
default_declarations.insert(NamespaceDeclaration {
prefix: Some("xmlns".to_string()),
namespace: XMLNS_NS.to_string(),
});
Self { Self {
inner: writer, inner: writer,
depth: Vec::new(), depth: Vec::new(),
namespace_declarations: Vec::new(), namespace_declarations: vec![default_declarations],
} }
} }
pub fn into_inner(self) -> W {
self.inner
}
} }
impl<W: AsyncWrite + Unpin + Send> Writer<W> { impl<W: AsyncWrite + Unpin + Send> Writer<W> {
pub async fn write_declaration(&mut self, version: VersionInfo) -> Result<()> {
let declaration = Declaration::version(version);
let version_info;
match declaration.version_info {
VersionInfo::One => version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::One),
VersionInfo::OneDotOne => {
version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::OneDotOne)
}
}
let declaration = xml::XMLDecl {
version_info,
encoding_decl: None,
sd_decl: None,
};
declaration.write(&mut self.inner).await?;
Ok(())
}
pub async fn write_full(&mut self, into_element: &impl IntoElement) -> Result<()> {
let element = into_element.into_element();
Ok(self.write_element(&element).await?)
}
pub async fn write_start(&mut self, into_element: &impl IntoElement) -> Result<()> {
let element = into_element.into_element();
Ok(self.write_element_start(&element).await?)
}
pub async fn write_all_content(&mut self, into_element: &impl IntoElement) -> Result<()> {
for content in &into_element.get_content() {
self.write_content(content).await?;
}
Ok(())
}
#[async_recursion] #[async_recursion]
pub async fn write_element(&mut self, element: &Element) -> Result<()> { pub async fn write_element(&mut self, element: &Element) -> Result<()> {
if element.content.is_empty() { if element.content.is_empty() {
self.write_empty(element).await?; self.write_empty(element).await?;
} else { } else {
self.write_start(element).await?; self.write_element_start(element).await?;
for content in &element.content { for content in &element.content {
self.write_content(content).await?; self.write_content(content).await?;
} }
@ -107,12 +156,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
if let Some(prefix) = &prefix { if let Some(prefix) = &prefix {
att_name = xml::QName::PrefixedName(xml::PrefixedName { att_name = xml::QName::PrefixedName(xml::PrefixedName {
prefix: xml::Prefix::parse_full(prefix)?, prefix: xml::Prefix::parse_full(prefix)?,
local_part: xml::LocalPart::parse_full(&element.name.local_name)?, local_part: xml::LocalPart::parse_full(&name.local_name)?,
}) })
} else { } else {
att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full( att_name =
&element.name.local_name, xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)
)?)
} }
let value = xml::AttValue::from(value.as_str()); let value = xml::AttValue::from(value.as_str());
@ -131,7 +179,7 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
Ok(()) Ok(())
} }
pub async fn write_start(&mut self, element: &Element) -> Result<()> { pub async fn write_element_start(&mut self, element: &Element) -> Result<()> {
let namespace_declarations_stack: Vec<_> = self let namespace_declarations_stack: Vec<_> = self
.namespace_declarations .namespace_declarations
.iter() .iter()
@ -195,12 +243,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
if let Some(prefix) = &prefix { if let Some(prefix) = &prefix {
att_name = xml::QName::PrefixedName(xml::PrefixedName { att_name = xml::QName::PrefixedName(xml::PrefixedName {
prefix: xml::Prefix::parse_full(prefix)?, prefix: xml::Prefix::parse_full(prefix)?,
local_part: xml::LocalPart::parse_full(&element.name.local_name)?, local_part: xml::LocalPart::parse_full(&name.local_name)?,
}) })
} else { } else {
att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full( att_name =
&element.name.local_name, xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)
)?)
} }
let value = xml::AttValue::from(value.as_str()); let value = xml::AttValue::from(value.as_str());

View File

@ -389,9 +389,9 @@ pub type Prolog<'s> = (
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' /// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
#[derive(Debug)] #[derive(Debug)]
pub struct XMLDecl<'s> { pub struct XMLDecl<'s> {
version_info: VersionInfo, pub(crate) version_info: VersionInfo,
encoding_decl: Option<EncodingDecl<'s>>, pub(crate) encoding_decl: Option<EncodingDecl<'s>>,
sd_decl: Option<SDDecl>, pub(crate) sd_decl: Option<SDDecl>,
} }
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') /// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
@ -401,6 +401,17 @@ pub enum VersionInfo {
DoubleQuoted(VersionNum), DoubleQuoted(VersionNum),
} }
impl Deref for VersionInfo {
type Target = VersionNum;
fn deref(&self) -> &Self::Target {
match self {
VersionInfo::SingleQuoted(version_num) => version_num,
VersionInfo::DoubleQuoted(version_num) => version_num,
}
}
}
/// [25] Eq ::= S? '=' S? /// [25] Eq ::= S? '=' S?
#[derive(Clone)] #[derive(Clone)]
pub struct Eq; pub struct Eq;
@ -479,6 +490,17 @@ pub enum SDDecl {
DoubleQuoted(bool), DoubleQuoted(bool),
} }
impl Deref for SDDecl {
type Target = bool;
fn deref(&self) -> &Self::Target {
match self {
SDDecl::SingleQuoted(b) => b,
SDDecl::DoubleQuoted(b) => b,
}
}
}
// (Productions 33 through 38 have been removed.) // (Productions 33 through 38 have been removed.)
/// [39] element ::= EmptyElemTag | STag content ETag /// [39] element ::= EmptyElemTag | STag content ETag
@ -846,10 +868,26 @@ pub struct ExtParsedEnt<'s> {
// TODO?: select quote version // TODO?: select quote version
pub struct EncodingDecl<'s>(EncName<'s>); pub struct EncodingDecl<'s>(EncName<'s>);
impl<'s> Deref for EncodingDecl<'s> {
type Target = EncName<'s>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* /// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
#[derive(Debug)] #[derive(Debug)]
pub struct EncName<'s>(&'s str); pub struct EncName<'s>(&'s str);
impl<'s> Deref for EncName<'s> {
type Target = &'s str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug)] #[derive(Debug)]
pub enum NotationDeclID<'s> { pub enum NotationDeclID<'s> {
External(ExternalID<'s>), External(ExternalID<'s>),