This commit is contained in:
cel 🌸 2024-11-24 02:05:41 +00:00
parent c1e6f7e918
commit 87e6ff405b
7 changed files with 240 additions and 46 deletions

21
src/declaration.rs Normal file
View File

@ -0,0 +1,21 @@
pub struct Declaration {
pub version_info: VersionInfo,
pub encoding_decl: Option<String>,
pub sd_decl: Option<bool>,
}
#[derive(Clone, Copy)]
pub enum VersionInfo {
One,
OneDotOne,
}
impl Declaration {
pub fn version(version: VersionInfo) -> Self {
Self {
version_info: version,
encoding_decl: None,
sd_decl: None,
}
}
}

View File

@ -9,8 +9,22 @@ use std::{
use crate::{
error::Error,
xml::{self, parsers_complete::Parser, Attribute},
Result,
};
pub trait FromElement: Sized {
fn from_element(element: Element) -> Result<Self>;
}
pub trait IntoElement {
fn into_element(&self) -> Element;
fn get_content(&self) -> Vec<Content> {
let element = self.into_element();
element.content
}
}
// when are namespaces names chosen then if they are automatically calculated
// namespaces are held by readers and writers.
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
@ -26,7 +40,7 @@ pub struct Name {
pub local_name: String,
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum Content {
Element(Element),
Text(String),
@ -35,7 +49,7 @@ pub enum Content {
}
// should this be a trait?
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct Element {
pub name: Name,
// namespace: Name,
@ -51,6 +65,7 @@ pub struct Element {
// you can validate the prefix and calculate the namespace from the current reader context
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
pub attributes: HashMap<Name, String>,
// TODO: make a hashmap maybe? to be able to address parts of the content individually
pub content: Vec<Content>,
}

View File

@ -1,6 +1,6 @@
use std::{num::ParseIntError, str::Utf8Error};
use crate::element::{Name, NamespaceDeclaration};
use crate::element::{Content, Name, NamespaceDeclaration};
#[derive(Debug)]
pub enum Error {
@ -17,6 +17,12 @@ pub enum Error {
NotInElement(String),
ExtraData(String),
UndeclaredNamespace(String),
IncorrectName(Name),
UnexpectedAttribute(Name),
DeserializeError(String),
UnexpectedNumberOfContents(usize),
UnexpectedContent(Content),
UnexpectedElement(Name),
}
impl From<std::io::Error> for Error {

View File

@ -1,7 +1,16 @@
mod element;
pub mod declaration;
pub mod element;
mod error;
pub mod reader;
mod writer;
pub mod xml;
pub type Result<T> = std::result::Result<T, error::Error>;
pub const XML_NS: &str = "http://www.w3.org/XML/1998/namespace";
pub const XMLNS_NS: &str = "http://www.w3.org/2000/xmlns/";
pub use element::Element;
pub use error::Error;
pub use reader::Reader;
pub use writer::Writer;

View File

@ -2,7 +2,7 @@ use circular::Buffer;
use futures::{FutureExt, Stream};
use nom::Err;
use std::{
collections::{BTreeMap, HashMap, HashSet},
collections::{hash_set, BTreeMap, HashMap, HashSet},
future::Future,
path::Prefix,
pin::{pin, Pin},
@ -10,19 +10,20 @@ use std::{
};
use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncReadExt};
static MAX_STANZA_SIZE: usize = 65536;
use crate::{
element::{Content, Element, Name, NamespaceDeclaration},
declaration::{Declaration, VersionInfo},
element::{Content, Element, FromElement, Name, NamespaceDeclaration},
error::Error,
xml::{self, parsers::Parser},
Result,
Result, XMLNS_NS, XML_NS,
};
static MAX_STANZA_SIZE: usize = 65536;
/// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> {
inner: R,
buffer: Buffer,
pub buffer: Buffer,
// holds which tags we are in atm over depth
// to have names reference namespaces could
depth: Vec<Name>,
@ -31,13 +32,27 @@ pub struct Reader<R> {
impl<R> Reader<R> {
pub fn new(reader: R) -> Self {
let mut default_declarations = HashSet::new();
default_declarations.insert(NamespaceDeclaration {
prefix: Some("xml".to_string()),
namespace: XML_NS.to_string(),
});
default_declarations.insert(NamespaceDeclaration {
prefix: Some("xmlns".to_string()),
namespace: XMLNS_NS.to_string(),
});
Self {
inner: reader,
buffer: Buffer::with_capacity(MAX_STANZA_SIZE),
depth: Vec::new(),
namespace_declarations: Vec::new(),
// TODO: make sure reserved namespaces are never overwritten
namespace_declarations: vec![default_declarations],
}
}
pub fn into_inner(self) -> R {
self.inner
}
}
impl<R> Reader<R>
@ -48,18 +63,35 @@ where
Ok(self.inner.read_buf(&mut self.buffer).await?)
}
pub async fn read_prolog<'s>(&'s mut self) -> Result<()> {
pub async fn read_prolog<'s>(&'s mut self) -> Result<Option<Declaration>> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Prolog::parse(input) {
Ok((rest, _prolog)) => {
Ok((rest, (decl, _misc, _doctype_decl))) => {
let len = self.buffer.available_data() - rest.as_bytes().len();
// TODO: return error if there is a doctype decl
if let Some(decl) = decl {
let declaration = Declaration {
version_info: match *decl.version_info {
xml::VersionNum::One => VersionInfo::One,
xml::VersionNum::OneDotOne => VersionInfo::OneDotOne,
},
encoding_decl: decl
.encoding_decl
.map(|encoding_decl| (**encoding_decl).to_string()),
sd_decl: decl.sd_decl.map(|sd_decl| *sd_decl),
};
self.buffer.consume(len);
return Ok(());
return Ok(Some(declaration));
} else {
self.buffer.consume(len);
return Ok(None);
}
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -68,9 +100,18 @@ where
}
}
pub async fn read_start<'s, T: FromElement>(&'s mut self) -> Result<T> {
let element = self.read_start_tag().await?;
FromElement::from_element(element)
}
pub async fn read<'s, T: FromElement>(&'s mut self) -> Result<T> {
let element = self.read_element().await?;
FromElement::from_element(element)
}
pub async fn read_start_tag<'s>(&'s mut self) -> Result<Element> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::STag::parse(input) {
Ok((rest, e)) => {
@ -84,7 +125,9 @@ where
return Ok(element);
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -95,7 +138,6 @@ where
pub async fn read_end_tag<'s>(&'s mut self) -> Result<()> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::ETag::parse(input) {
Ok((rest, e)) => {
@ -109,7 +151,9 @@ where
return Ok(());
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -120,7 +164,6 @@ where
pub async fn read_element<'s>(&'s mut self) -> Result<Element> {
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
match xml::Element::parse(input) {
Ok((rest, e)) => {
@ -131,7 +174,9 @@ where
return Ok(element);
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => {}
Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -144,7 +189,6 @@ where
let mut last_char = false;
let mut text = String::new();
loop {
self.read_buf().await?;
let input = str::from_utf8(self.buffer.data())?;
if last_char == false {
match xml::CharData::parse(input) {
@ -155,7 +199,9 @@ where
last_char = true;
}
std::result::Result::Err(e) => match e {
Err::Incomplete(_needed) => continue,
Err::Incomplete(_) => {
self.read_buf().await?;
}
_ => match xml::ContentItem::parse(input) {
Ok((rest, content_item)) => match content_item {
xml::ContentItem::Element(element) => {
@ -207,7 +253,9 @@ where
}
},
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => continue,
Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -263,7 +311,9 @@ where
}
},
std::result::Result::Err(e) => match e {
Err::Incomplete(_) => continue,
Err::Incomplete(_) => {
self.read_buf().await?;
}
// TODO: better error
Err::Error(e) => return Err(Error::ParseError(e.to_string())),
Err::Failure(e) => return Err(Error::ParseError(e.to_string())),
@ -622,7 +672,11 @@ impl<R> Reader<R> {
for (content_item, char_data) in xml_content.content {
match content_item {
xml::ContentItem::Element(element) => {
text.map(|text| content.push(Content::Text(text)));
text.map(|text| {
if !text.is_empty() {
content.push(Content::Text(text))
}
});
content.push(Content::Element(Self::element_from_xml(
namespaces, element,
)?));
@ -655,7 +709,11 @@ impl<R> Reader<R> {
}
}
}
text.map(|text| content.push(Content::Text(text)));
text.map(|text| {
if !text.is_empty() {
content.push(Content::Text(text))
}
});
Ok(content)
}
}

View File

@ -5,10 +5,11 @@ use futures::Sink;
use tokio::io::{AsyncWrite, AsyncWriteExt};
use crate::{
element::{escape_str, Content, Element, Name, NamespaceDeclaration},
declaration::{Declaration, VersionInfo},
element::{escape_str, Content, Element, IntoElement, Name, NamespaceDeclaration},
error::Error,
xml::{self, composers::Composer, parsers_complete::Parser, ETag},
Result,
xml::{self, composers::Composer, parsers_complete::Parser, ETag, XMLDecl},
Result, XMLNS_NS, XML_NS,
};
// pub struct Writer<W, C = Composer> {
@ -20,21 +21,69 @@ pub struct Writer<W> {
impl<W> Writer<W> {
pub fn new(writer: W) -> Self {
let mut default_declarations = HashSet::new();
default_declarations.insert(NamespaceDeclaration {
prefix: Some("xml".to_string()),
namespace: XML_NS.to_string(),
});
default_declarations.insert(NamespaceDeclaration {
prefix: Some("xmlns".to_string()),
namespace: XMLNS_NS.to_string(),
});
Self {
inner: writer,
depth: Vec::new(),
namespace_declarations: Vec::new(),
namespace_declarations: vec![default_declarations],
}
}
pub fn into_inner(self) -> W {
self.inner
}
}
impl<W: AsyncWrite + Unpin + Send> Writer<W> {
pub async fn write_declaration(&mut self, version: VersionInfo) -> Result<()> {
let declaration = Declaration::version(version);
let version_info;
match declaration.version_info {
VersionInfo::One => version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::One),
VersionInfo::OneDotOne => {
version_info = xml::VersionInfo::SingleQuoted(xml::VersionNum::OneDotOne)
}
}
let declaration = xml::XMLDecl {
version_info,
encoding_decl: None,
sd_decl: None,
};
declaration.write(&mut self.inner).await?;
Ok(())
}
pub async fn write_full(&mut self, into_element: &impl IntoElement) -> Result<()> {
let element = into_element.into_element();
Ok(self.write_element(&element).await?)
}
pub async fn write_start(&mut self, into_element: &impl IntoElement) -> Result<()> {
let element = into_element.into_element();
Ok(self.write_element_start(&element).await?)
}
pub async fn write_all_content(&mut self, into_element: &impl IntoElement) -> Result<()> {
for content in &into_element.get_content() {
self.write_content(content).await?;
}
Ok(())
}
#[async_recursion]
pub async fn write_element(&mut self, element: &Element) -> Result<()> {
if element.content.is_empty() {
self.write_empty(element).await?;
} else {
self.write_start(element).await?;
self.write_element_start(element).await?;
for content in &element.content {
self.write_content(content).await?;
}
@ -107,12 +156,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
if let Some(prefix) = &prefix {
att_name = xml::QName::PrefixedName(xml::PrefixedName {
prefix: xml::Prefix::parse_full(prefix)?,
local_part: xml::LocalPart::parse_full(&element.name.local_name)?,
local_part: xml::LocalPart::parse_full(&name.local_name)?,
})
} else {
att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(
&element.name.local_name,
)?)
att_name =
xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)
}
let value = xml::AttValue::from(value.as_str());
@ -131,7 +179,7 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
Ok(())
}
pub async fn write_start(&mut self, element: &Element) -> Result<()> {
pub async fn write_element_start(&mut self, element: &Element) -> Result<()> {
let namespace_declarations_stack: Vec<_> = self
.namespace_declarations
.iter()
@ -195,12 +243,11 @@ impl<W: AsyncWrite + Unpin + Send> Writer<W> {
if let Some(prefix) = &prefix {
att_name = xml::QName::PrefixedName(xml::PrefixedName {
prefix: xml::Prefix::parse_full(prefix)?,
local_part: xml::LocalPart::parse_full(&element.name.local_name)?,
local_part: xml::LocalPart::parse_full(&name.local_name)?,
})
} else {
att_name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(
&element.name.local_name,
)?)
att_name =
xml::QName::UnprefixedName(xml::UnprefixedName::parse_full(&name.local_name)?)
}
let value = xml::AttValue::from(value.as_str());

View File

@ -389,9 +389,9 @@ pub type Prolog<'s> = (
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
#[derive(Debug)]
pub struct XMLDecl<'s> {
version_info: VersionInfo,
encoding_decl: Option<EncodingDecl<'s>>,
sd_decl: Option<SDDecl>,
pub(crate) version_info: VersionInfo,
pub(crate) encoding_decl: Option<EncodingDecl<'s>>,
pub(crate) sd_decl: Option<SDDecl>,
}
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
@ -401,6 +401,17 @@ pub enum VersionInfo {
DoubleQuoted(VersionNum),
}
impl Deref for VersionInfo {
type Target = VersionNum;
fn deref(&self) -> &Self::Target {
match self {
VersionInfo::SingleQuoted(version_num) => version_num,
VersionInfo::DoubleQuoted(version_num) => version_num,
}
}
}
/// [25] Eq ::= S? '=' S?
#[derive(Clone)]
pub struct Eq;
@ -479,6 +490,17 @@ pub enum SDDecl {
DoubleQuoted(bool),
}
impl Deref for SDDecl {
type Target = bool;
fn deref(&self) -> &Self::Target {
match self {
SDDecl::SingleQuoted(b) => b,
SDDecl::DoubleQuoted(b) => b,
}
}
}
// (Productions 33 through 38 have been removed.)
/// [39] element ::= EmptyElemTag | STag content ETag
@ -846,10 +868,26 @@ pub struct ExtParsedEnt<'s> {
// TODO?: select quote version
pub struct EncodingDecl<'s>(EncName<'s>);
impl<'s> Deref for EncodingDecl<'s> {
type Target = EncName<'s>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
#[derive(Debug)]
pub struct EncName<'s>(&'s str);
impl<'s> Deref for EncName<'s> {
type Target = &'s str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug)]
pub enum NotationDeclID<'s> {
External(ExternalID<'s>),