Compare commits

..

No commits in common. "009b53c4a958fd751686ea185b006fa1a383b703" and "33e0d1ca8b930d67329d475d1eb1009a52c57bce" have entirely different histories.

12 changed files with 1411 additions and 3928 deletions

177
Cargo.lock generated
View File

@ -38,12 +38,6 @@ dependencies = [
"rustc-demangle", "rustc-demangle",
] ]
[[package]]
name = "bytes"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.89" version = "1.0.89"
@ -179,17 +173,6 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "mio"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"wasi",
"windows-sys 0.48.0",
]
[[package]] [[package]]
name = "nom" name = "nom"
version = "7.1.3" version = "7.1.3"
@ -260,16 +243,6 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "socket2"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.52" version = "2.0.52"
@ -288,12 +261,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931"
dependencies = [ dependencies = [
"backtrace", "backtrace",
"bytes",
"libc",
"mio",
"pin-project-lite", "pin-project-lite",
"socket2",
"windows-sys 0.48.0",
] ]
[[package]] [[package]]
@ -301,148 +269,3 @@ name = "unicode-ident"
version = "1.0.12" version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.5",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
dependencies = [
"windows_aarch64_gnullvm 0.52.5",
"windows_aarch64_msvc 0.52.5",
"windows_i686_gnu 0.52.5",
"windows_i686_gnullvm",
"windows_i686_msvc 0.52.5",
"windows_x86_64_gnu 0.52.5",
"windows_x86_64_gnullvm 0.52.5",
"windows_x86_64_msvc 0.52.5",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"

View File

@ -8,4 +8,4 @@ edition = "2021"
[dependencies] [dependencies]
futures = "0.3.30" futures = "0.3.30"
nom = "7.1.3" nom = "7.1.3"
tokio = { version = "1.36.0", features = ["io-util", "net"] } tokio = "1.36.0"

View File

@ -9,7 +9,7 @@ pub struct Namespace {
namespace: String, namespace: String,
} }
// names are qualified, they contain a reference to the namespace (held within the reader/writer) // names are qualified, they contain the namespace
pub struct Name { pub struct Name {
namespace: String, namespace: String,
name: String, name: String,
@ -22,27 +22,27 @@ pub enum Node {
// should this be a trait? // should this be a trait?
pub struct Element { pub struct Element {
pub name: Name, name: Name,
// namespace: Name, // namespace: (Name, String), // can't have this, must be external method that is called within the context of a reader/writer
// each element once created contains the qualified namespace information for that element // each element once created contains the qualified namespace information for that element
// the name contains the qualified namespace so this is unnecessary // the name contains the qualified namespace so this is unnecessary
// namespace: String, // namespace: String,
// hashmap of explicit namespace declarations on the element itself only // hashmap of explicit namespace declarations on the element itself only
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader. // possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
pub namespace_decl: HashMap<Option<String>, String>, // namespaces: HashMap<Option<String>, String>,
// attributes can be in a different namespace than the element. how to make sure they are valid? // attributes can be in a different namespace than the element. how to make sure they are valid?
// maybe include the namespace instead of or with the prefix // maybe include the namespace instead of or with the prefix
// you can calculate the prefix from the namespaced name and the current writer context // you can calculate the prefix from the namespaced name and the current writer context
// you can validate the prefix and calculate the namespace from the current reader context // you can validate the prefix and calculate the namespace from the current reader context
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified. // this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
pub attributes: HashMap<Name, String>, attributes: HashMap<Name, String>,
pub children: Option<Vec<Node>>, children: Option<Vec<Node>>,
} }
// example of deriving an element: // example of deriving an element:
// #[derive(XMLWrite, XMLRead)] // #[derive(XMLWrite, XMLRead)]
// #[peanuts(xmlns = "jabber:client", xmlns:stream = "http://etherx.jabber.org/streams", prefix = "stream")] // #[peanuts(namespace = "jabber:client", namespace:stream = "http://etherx.jabber.org/streams", name = "stream:stream")]
// pub struct Stream { // pub struct Stream {
// from: JID, // from: JID,
// id: String, // id: String,

View File

@ -1,19 +1 @@
use std::str::Utf8Error; pub enum Error {}
pub enum Error {
ReadError(std::io::Error),
Utf8Error(Utf8Error),
ParseError(String),
}
impl From<std::io::Error> for Error {
fn from(e: std::io::Error) -> Self {
Self::ReadError(e)
}
}
impl From<Utf8Error> for Error {
fn from(e: Utf8Error) -> Self {
Self::Utf8Error(e)
}
}

View File

@ -1,5 +1,5 @@
mod element; mod element;
mod error; mod error;
pub mod parser;
mod reader; mod reader;
mod writer; mod writer;
pub mod xml;

View File

@ -1,15 +1,14 @@
use peanuts::xml::parsers::Parser; use peanuts::parser::document;
use peanuts::xml::Document;
fn main() { fn main() {
let document = Document::parse( let document = document(
"<?xml version=\"1.0\"?> "<?xml version=\"1.0\"?>
<TEST> <TEST>
<block1>Background Mark 1</block1> <block1>Background Mark 1</block1>
<block2>Background Mark 2</block2> <block2>Background Mark 2</block2>
<block3>Background Mark 3</block3> <block3>Background Mark 3</block3>
</TEST>ahsdkjlfhasdlkjfhkljh </TEST>
", ",
); );
println!("{:#?}", document); println!("{:?}", document);
} }

1380
src/parser.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,5 @@
use futures::Stream; use futures::Stream;
use nom::Err; use tokio::io::AsyncBufRead;
use std::{collections::BTreeMap, str};
use tokio::io::AsyncBufReadExt;
use crate::{ use crate::{
element::{Element, Name, Namespace}, element::{Element, Name, Namespace},
@ -10,91 +8,28 @@ use crate::{
/// streaming reader that tracks depth and available namespaces at current depth /// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> { pub struct Reader<R> {
inner: R, stream: R,
// holds which tags we are in atm over depth // holds which tags we are in atm over depth
depth: Vec<Name>, depth: Vec<Name>,
namespaces: Vec<(usize, Namespace)>, namespaces: Vec<(usize, Namespace)>,
} }
impl<R> Reader<R> { impl<R> Reader<R>
pub fn new(reader: R) -> Self { where
Self { R: AsyncBufRead,
inner: reader, {
depth: Vec::new(), // pub async fn read(&self) -> Result<impl From<Element>, Error> {
namespaces: Vec::new(), // let buf = self.stream.poll_fill_buf().await?;
} // todo!()
} // }
// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
// todo!()
// }
// pub async fn read_end(&self) -> Result<(), Error> {
// todo!()
// }
} }
// impl<R> Reader<R>
// where
// R: AsyncBufReadExt + Unpin,
// {
// /// could resursively read and include namespace tree with values to be shadowed within new local context
// async fn read_recursive(&mut self, namespaces: BTreeMap<Option<String>, String>) -> Result<Element, Error> {
// let element;
// let len;
// loop {
// let buf = self.inner.fill_buf().await?;
// let input = str::from_utf8(buf)?;
// match crate::xml::element(input) {
// Ok((rest, e)) => {
// element = e;
// len = buf.len() - rest.len();
// break;
// }
// Err(e) => match e {
// Err::Incomplete(_) => (),
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
// },
// }
// }
// let final;
// match element {
// crate::xml::Element::Empty(e) => {
// let final = Element {
// }
// },
// crate::xml::Element::NotEmpty(_, _, _) => todo!(),
// }
// self.inner.consume(len);
// todo!()
// }
// /// reads entire next prolog, element, or misc
// pub async fn read<E: From<Element>>(&mut self) -> Result<E, Error> {
// let element;
// let len;
// loop {
// let buf = self.inner.fill_buf().await?;
// let input = str::from_utf8(buf)?;
// match crate::xml::element(input) {
// Ok((rest, e)) => {
// element = e;
// len = buf.len() - rest.len();
// break;
// }
// Err(e) => match e {
// Err::Incomplete(_) => (),
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
// },
// }
// }
// self.inner.consume(len);
// // Ok(element)
// todo!()
// }
// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
// todo!()
// }
// pub async fn read_end(&self) -> Result<(), Error> {
// todo!()
// }
// }
// impl<R: AsyncBufRead> Stream for Reader<R> { // impl<R: AsyncBufRead> Stream for Reader<R> {
// type Item = impl From<Element>; // type Item = impl From<Element>;

View File

@ -5,9 +5,8 @@ use crate::{
error::Error, error::Error,
}; };
// pub struct Writer<W, C = Composer> {
pub struct Writer<W> { pub struct Writer<W> {
writer: W, stream: W,
depth: Vec<Name>, depth: Vec<Name>,
namespaces: Vec<(usize, Namespace)>, namespaces: Vec<(usize, Namespace)>,
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,611 +0,0 @@
use std::char;
pub mod composers;
pub mod parsers;
/// [1] NSAttName ::= PrefixedAttName | DefaultAttName
#[derive(Clone, Debug)]
pub enum NSAttName<'s> {
PrefixedAttName(PrefixedAttName<'s>),
DefaultAttName,
}
/// [2] PrefixedAttName ::= 'xmlns:' NCName
#[derive(Clone, Debug)]
pub struct PrefixedAttName<'s>(NCName<'s>);
/// [3] DefaultAttName ::= 'xmlns';
#[derive(Clone, Debug)]
pub struct DefaultAttName;
/// [4] NCName ::= Name - (Char* ':' Char*)
#[derive(Clone, Debug)]
pub struct NCName<'s>(&'s str);
/// [7] QName ::= PrefixedName | UnprefixedName
#[derive(Clone, Debug)]
pub enum QName<'s> {
PrefixedName(PrefixedName<'s>),
UnprefixedName(UnprefixedName<'s>),
}
/// [8] PrefixedName ::= Prefix ':' LocalPart
#[derive(Clone, Debug)]
pub struct PrefixedName<'s> {
prefix: Prefix<'s>,
local_part: LocalPart<'s>,
}
/// [9] UnprefixedName ::= LocalPart
#[derive(Clone, Debug)]
pub struct UnprefixedName<'s>(LocalPart<'s>);
/// [10] Prefix ::= NCName
#[derive(Clone, Debug)]
pub struct Prefix<'s>(NCName<'s>);
/// [11] LocalPart ::= NCName
#[derive(Clone, Debug)]
pub struct LocalPart<'s>(NCName<'s>);
// xml spec
/// [1] document ::= prolog element Misc*
pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
#[repr(transparent)]
pub struct Char(char);
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
#[derive(Clone)]
#[repr(transparent)]
pub struct S;
/// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
#[repr(transparent)]
pub struct NameStartChar(char);
/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
#[repr(transparent)]
pub struct NameChar(char);
/// [5] Name ::= NameStartChar (NameChar)*
#[derive(Debug, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct Name<'s>(&'s str);
/// [6] Names ::= Name (#x20 Name)*
#[repr(transparent)]
pub struct Names<'s>(Vec<Name<'s>>);
/// [7] Nmtoken ::= (NameChar)+
#[derive(Debug, Clone)]
#[repr(transparent)]
pub struct Nmtoken<'s>(&'s str);
/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
#[repr(transparent)]
pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>);
#[derive(Clone, Debug)]
pub enum EntityValueData<'s> {
String(&'s str),
PEReference(PEReference<'s>),
Reference(Reference<'s>),
}
/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
/// | "'" ([^%&'] | PEReference | Reference)* "'"
#[derive(Debug)]
pub enum EntityValue<'s> {
DoubleQuoted(Vec<EntityValueData<'s>>),
SingleQuoted(Vec<EntityValueData<'s>>),
}
#[derive(Clone, Debug)]
pub enum AttValueData<'s> {
String(&'s str),
Reference(Reference<'s>),
}
/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
/// | "'" ([^<&'] | Reference)* "'"
#[derive(Clone, Debug)]
pub enum AttValue<'s> {
DoubleQuoted(Vec<AttValueData<'s>>),
SingleQuoted(Vec<AttValueData<'s>>),
}
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
#[derive(Debug)]
pub enum SystemLiteral<'s> {
DoubleQuoted(&'s str),
SingleQuoted(&'s str),
}
/// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
#[derive(Debug)]
pub enum PubidLiteral<'s> {
DoubleQuoted(&'s str),
SingleQuoted(&'s str),
}
/// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
#[repr(transparent)]
pub struct PubidChar(char);
/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct CharData<'s>(&'s str);
/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Comment<'s>(&'s str);
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
#[derive(Clone, Debug)]
pub struct PI<'s> {
target: PITarget<'s>,
instruction: Option<&'s str>,
}
/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct PITarget<'s>(Name<'s>);
/// [18] CDSect ::= CDStart CData CDEnd
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct CDSect<'s>(CData<'s>);
/// [19] CDStart ::= '<![CDATA['
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDStart;
/// [20] CData ::= (Char* - (Char* ']]>' Char*))
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct CData<'s>(&'s str);
/// [21] CDEnd ::= ']]>'
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDEnd;
/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
pub type Prolog<'s> = (
Option<XMLDecl<'s>>,
Vec<Misc<'s>>,
Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,
);
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
#[derive(Debug)]
pub struct XMLDecl<'s> {
version_info: VersionInfo,
encoding_decl: Option<EncodingDecl<'s>>,
sd_decl: Option<SDDecl>,
}
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
#[derive(Debug)]
pub enum VersionInfo {
SingleQuoted(VersionNum),
DoubleQuoted(VersionNum),
}
/// [25] Eq ::= S? '=' S?
#[derive(Clone)]
pub struct Eq;
/// [26] VersionNum ::= '1.' [0-9]+
#[derive(Clone, Debug)]
pub enum VersionNum {
One,
OneDotOne,
}
/// [27] Misc ::= Comment | PI | S
#[derive(Clone, Debug)]
pub enum Misc<'s> {
Comment(Comment<'s>),
PI(PI<'s>),
// TODO: how to deal with whitespace
S,
}
/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
#[derive(Debug)]
pub struct DoctypeDecl<'s> {
name: QName<'s>,
external_id: Option<ExternalID<'s>>,
int_subset: Option<IntSubset<'s>>,
}
/// [28a] DeclSep ::= PEReference | S
#[derive(Clone, Debug)]
pub enum DeclSep<'s> {
PEReference(PEReference<'s>),
// TODO: tackle whitespace
S,
}
#[derive(Debug)]
pub enum IntSubsetDeclaration<'s> {
MarkupDecl(MarkupDecl<'s>),
DeclSep(DeclSep<'s>),
}
/// from [16] intSubset ::= (markupdecl | PEReference | S)*
/// [28b] intSubset ::= (markupdecl | DeclSep)*
pub type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;
/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
#[derive(Debug)]
pub enum MarkupDecl<'s> {
Elementdecl(Elementdecl<'s>),
AttlistDecl(AttlistDecl<'s>),
EntityDecl(EntityDecl<'s>),
NotationDecl(NotationDecl<'s>),
PI(PI<'s>),
Comment(Comment<'s>),
}
/// [30] extSubset ::= TextDecl? extSubsetDecl
pub struct ExtSubset<'s> {
text_decl: Option<TextDecl<'s>>,
ext_subset_decl: ExtSubsetDecl<'s>,
}
pub enum ExtSubsetDeclaration<'s> {
MarkupDecl(MarkupDecl<'s>),
ConditionalSect(ConditionalSect<'s>),
DeclSep(DeclSep<'s>),
}
/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
#[derive(Debug, Clone)]
pub enum SDDecl {
SingleQuoted(bool),
DoubleQuoted(bool),
}
// (Productions 33 through 38 have been removed.)
/// [39] element ::= EmptyElemTag | STag content ETag
#[derive(Debug, Clone)]
pub enum Element<'s> {
Empty(EmptyElemTag<'s>),
NotEmpty(STag<'s>, Content<'s>, ETag<'s>),
}
/// [12] STag ::= '<' QName (S Attribute)* S? '>'
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
#[derive(Debug, Clone)]
pub struct STag<'s> {
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
// pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
/// [41] Attribute ::= Name Eq AttValue
#[derive(Debug, Clone)]
pub enum Attribute<'s> {
NamespaceDeclaration {
ns_name: NSAttName<'s>,
value: AttValue<'s>,
},
Attribute {
name: QName<'s>,
value: AttValue<'s>,
},
}
/// [13] ETag ::= '</' QName S? '>'
/// [42] ETag ::= '</' Name S? '>'
#[derive(Debug, Clone)]
pub struct ETag<'s> {
name: QName<'s>,
}
#[derive(Debug, Clone)]
pub enum ContentItem<'s> {
// CharData(&'s str),
Element(Element<'s>),
Reference(Reference<'s>),
CDSect(CDSect<'s>),
PI(PI<'s>),
Comment(Comment<'s>),
}
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
#[derive(Debug, Clone)]
pub struct Content<'s> {
char_data: Option<CharData<'s>>,
content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
}
/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
#[derive(Debug)]
pub struct Elementdecl<'s> {
name: QName<'s>,
contentspec: Contentspec<'s>,
}
// TODO: casings???
// TODO: wtf does that todo mean?
/// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
#[derive(Clone, Debug)]
pub enum Contentspec<'s> {
Empty,
Any,
Mixed(Mixed<'s>),
Children(Children<'s>),
}
/// Occurence ::= ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub enum Occurence {
Once,
Optional,
Many0,
Many1,
}
#[derive(Clone, Debug)]
pub enum ChildrenKind<'s> {
Choice(Choice<'s>),
Seq(Seq<'s>),
}
/// [47] children ::= (choice | seq) ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub struct Children<'s> {
kind: ChildrenKind<'s>,
occurence: Occurence,
}
#[derive(Clone, Debug)]
pub enum CpKind<'s> {
Name(QName<'s>),
Choice(Choice<'s>),
Seq(Seq<'s>),
}
/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')?
/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub struct Cp<'s> {
kind: CpKind<'s>,
occurence: Occurence,
}
/// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
#[derive(Clone, Debug)]
pub struct Choice<'s>(Vec<Cp<'s>>);
/// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
#[derive(Clone, Debug)]
pub struct Seq<'s>(Vec<Cp<'s>>);
// always contains #PCDATA
/// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'
/// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
#[derive(Clone, Debug)]
pub struct Mixed<'s>(Vec<QName<'s>>);
/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'
/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
#[derive(Debug)]
pub struct AttlistDecl<'s> {
element_type: QName<'s>,
att_defs: Vec<AttDef<'s>>,
}
#[derive(Debug)]
pub enum AttDefName<'s> {
QName(QName<'s>),
NSAttName(NSAttName<'s>),
}
/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
/// [53] AttDef ::= S Name S AttType S DefaultDecl
#[derive(Debug)]
pub struct AttDef<'s> {
name: AttDefName<'s>,
att_type: AttType<'s>,
default_decl: DefaultDecl<'s>,
}
/// [54] AttType ::= StringType | TokenizedType | EnumeratedType
#[derive(Clone, Debug)]
pub enum AttType<'s> {
StringType,
TokenizedType(TokenizedType),
EnumeratedType(EnumeratedType<'s>),
}
/// [55] StringType ::= 'CDATA'
#[derive(Clone)]
pub struct StringType;
/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
#[derive(Clone, Debug)]
pub enum TokenizedType {
ID,
IDRef,
IDRefs,
Entity,
Entities,
NMToken,
NMTokens,
}
/// [57] EnumeratedType ::= NotationType | Enumeration
#[derive(Debug, Clone)]
pub enum EnumeratedType<'s> {
NotationType(NotationType<'s>),
Enumeration(Enumeration<'s>),
}
/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
#[derive(Debug, Clone)]
pub struct NotationType<'s>(Vec<Name<'s>>);
/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
#[derive(Debug, Clone)]
pub struct Enumeration<'s>(Vec<Nmtoken<'s>>);
/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
#[derive(Debug, Clone)]
pub enum DefaultDecl<'s> {
Required,
Implied,
/// if bool == true, attribute MUST always have default value
Fixed(bool, AttValue<'s>),
}
/// [61] conditionalSect ::= includeSect | ignoreSect
pub enum ConditionalSect<'s> {
IncludeSect(IncludeSect<'s>),
IgnoreSect(IgnoreSect<'s>),
}
/// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
pub struct IncludeSect<'s>(ExtSubsetDecl<'s>);
/// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);
/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
pub struct IgnoreSectContents<'s> {
// TODO: what the fuck does this mean
ignore: Ignore<'s>,
ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,
}
/// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
pub struct Ignore<'s>(&'s str);
/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
#[derive(Clone, Debug)]
pub enum CharRef<'s> {
Decimal(&'s str),
Hexadecimal(&'s str),
}
/// [67] Reference ::= EntityRef | CharRef
#[derive(Clone, Debug)]
pub enum Reference<'s> {
EntityRef(EntityRef<'s>),
CharRef(CharRef<'s>),
}
/// [68] EntityRef ::= '&' Name ';'
#[derive(Clone, Debug)]
pub struct EntityRef<'s>(Name<'s>);
/// [69] PEReference ::= '%' Name ';'
#[derive(Clone, Debug)]
#[repr(transparent)]
pub struct PEReference<'s>(Name<'s>);
/// [70] EntityDecl ::= GEDecl | PEDecl
#[derive(Debug)]
pub enum EntityDecl<'s> {
GEDecl(GEDecl<'s>),
PEDecl(PEDecl<'s>),
}
/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
#[derive(Debug)]
pub struct GEDecl<'s> {
name: Name<'s>,
entity_def: EntityDef<'s>,
}
/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
#[derive(Debug)]
pub struct PEDecl<'s> {
name: Name<'s>,
pe_def: PEDef<'s>,
}
/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
#[derive(Debug)]
pub enum EntityDef<'s> {
EntityValue(EntityValue<'s>),
ExternalID {
external_id: ExternalID<'s>,
n_data_decl: Option<NDataDecl<'s>>,
},
}
/// [74] PEDef ::= EntityValue | ExternalID
#[derive(Debug)]
pub enum PEDef<'s> {
EntityValue(EntityValue<'s>),
ExternalID(ExternalID<'s>),
}
/// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
#[derive(Debug)]
pub enum ExternalID<'s> {
SYSTEM {
system_identifier: SystemLiteral<'s>,
},
PUBLIC {
public_identifier: PubidLiteral<'s>,
system_identifier: SystemLiteral<'s>,
},
}
/// [76] NDataDecl ::= S 'NDATA' S Name
#[derive(Debug)]
pub struct NDataDecl<'s>(Name<'s>);
/// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
pub struct TextDecl<'s> {
version_info: Option<VersionInfo>,
encoding_decl: EncodingDecl<'s>,
}
/// [78] extParsedEnt ::= TextDecl? content
pub struct ExtParsedEnt<'s> {
text_decl: Option<TextDecl<'s>>,
content: Content<'s>,
}
/// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
#[derive(Debug)]
// TODO?: select quote version
pub struct EncodingDecl<'s>(EncName<'s>);
/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
#[derive(Debug)]
pub struct EncName<'s>(&'s str);
#[derive(Debug)]
pub enum NotationDeclID<'s> {
External(ExternalID<'s>),
Public(PublicID<'s>),
}
/// [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
#[derive(Debug)]
pub struct NotationDecl<'s> {
name: Name<'s>,
id: NotationDeclID<'s>,
}
/// [83] PublicID ::= 'PUBLIC' S PubidLiteral
#[derive(Debug)]
pub struct PublicID<'s>(PubidLiteral<'s>);

File diff suppressed because it is too large Load Diff