Compare commits

...

10 Commits

Author SHA1 Message Date
cel 🌸 009b53c4a9 implement XML composers 2024-11-02 01:48:54 +00:00
cel 🌸 d0a8d25d25 WIP: XML composers 2024-11-01 18:36:11 +00:00
cel 🌸 c6c3c1b403 change contents of Names and Nmtokens to Vecs 2024-11-01 13:37:33 +00:00
cel 🌸 ceb1bca7a4 fix build 2024-10-31 20:54:19 +00:00
cel 🌸 793bc4bfea fix CDSect parse test regression 2024-10-31 20:53:49 +00:00
cel 🌸 c41da2c89e refactor parsers into Parser trait implementations 2024-10-31 20:45:46 +00:00
cel 🌸 6b47106115 WIP: stream parsing 2024-06-29 17:06:08 +01:00
cel 🌸 1f0103cbec WIP: stream parsing 2024-06-27 20:22:16 +01:00
cel 🌸 c08b4504ab namespace parsing 2024-06-27 20:22:05 +01:00
cel 🌸 fa54b2dd3b IT WORKS 2024-06-25 23:25:48 +01:00
12 changed files with 3928 additions and 1411 deletions

177
Cargo.lock generated
View File

@ -38,6 +38,12 @@ dependencies = [
"rustc-demangle",
]
[[package]]
name = "bytes"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
[[package]]
name = "cc"
version = "1.0.89"
@ -173,6 +179,17 @@ dependencies = [
"autocfg",
]
[[package]]
name = "mio"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"wasi",
"windows-sys 0.48.0",
]
[[package]]
name = "nom"
version = "7.1.3"
@ -243,6 +260,16 @@ dependencies = [
"autocfg",
]
[[package]]
name = "socket2"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "syn"
version = "2.0.52"
@ -261,7 +288,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931"
dependencies = [
"backtrace",
"bytes",
"libc",
"mio",
"pin-project-lite",
"socket2",
"windows-sys 0.48.0",
]
[[package]]
@ -269,3 +301,148 @@ name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.5",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
dependencies = [
"windows_aarch64_gnullvm 0.52.5",
"windows_aarch64_msvc 0.52.5",
"windows_i686_gnu 0.52.5",
"windows_i686_gnullvm",
"windows_i686_msvc 0.52.5",
"windows_x86_64_gnu 0.52.5",
"windows_x86_64_gnullvm 0.52.5",
"windows_x86_64_msvc 0.52.5",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"

View File

@ -8,4 +8,4 @@ edition = "2021"
[dependencies]
futures = "0.3.30"
nom = "7.1.3"
tokio = "1.36.0"
tokio = { version = "1.36.0", features = ["io-util", "net"] }

View File

@ -9,7 +9,7 @@ pub struct Namespace {
namespace: String,
}
// names are qualified, they contain the namespace
// names are qualified, they contain a reference to the namespace (held within the reader/writer)
pub struct Name {
namespace: String,
name: String,
@ -22,27 +22,27 @@ pub enum Node {
// should this be a trait?
pub struct Element {
name: Name,
// namespace: (Name, String), // can't have this, must be external method that is called within the context of a reader/writer
pub name: Name,
// namespace: Name,
// each element once created contains the qualified namespace information for that element
// the name contains the qualified namespace so this is unnecessary
// namespace: String,
// hashmap of explicit namespace declarations on the element itself only
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
// namespaces: HashMap<Option<String>, String>,
pub namespace_decl: HashMap<Option<String>, String>,
// attributes can be in a different namespace than the element. how to make sure they are valid?
// maybe include the namespace instead of or with the prefix
// you can calculate the prefix from the namespaced name and the current writer context
// you can validate the prefix and calculate the namespace from the current reader context
// this results in readers and writers being able to return qualification errors as they aren't able to create elements until every part is qualified.
attributes: HashMap<Name, String>,
children: Option<Vec<Node>>,
pub attributes: HashMap<Name, String>,
pub children: Option<Vec<Node>>,
}
// example of deriving an element:
// #[derive(XMLWrite, XMLRead)]
// #[peanuts(namespace = "jabber:client", namespace:stream = "http://etherx.jabber.org/streams", name = "stream:stream")]
// #[peanuts(xmlns = "jabber:client", xmlns:stream = "http://etherx.jabber.org/streams", prefix = "stream")]
// pub struct Stream {
// from: JID,
// id: String,

View File

@ -1 +1,19 @@
pub enum Error {}
use std::str::Utf8Error;
pub enum Error {
ReadError(std::io::Error),
Utf8Error(Utf8Error),
ParseError(String),
}
impl From<std::io::Error> for Error {
fn from(e: std::io::Error) -> Self {
Self::ReadError(e)
}
}
impl From<Utf8Error> for Error {
fn from(e: Utf8Error) -> Self {
Self::Utf8Error(e)
}
}

View File

@ -1,5 +1,5 @@
mod element;
mod error;
pub mod parser;
mod reader;
mod writer;
pub mod xml;

View File

@ -1,14 +1,15 @@
use peanuts::parser::document;
use peanuts::xml::parsers::Parser;
use peanuts::xml::Document;
fn main() {
let document = document(
let document = Document::parse(
"<?xml version=\"1.0\"?>
<TEST>
<block1>Background Mark 1</block1>
<block2>Background Mark 2</block2>
<block3>Background Mark 3</block3>
</TEST>
</TEST>ahsdkjlfhasdlkjfhkljh
",
);
println!("{:?}", document);
println!("{:#?}", document);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,7 @@
use futures::Stream;
use tokio::io::AsyncBufRead;
use nom::Err;
use std::{collections::BTreeMap, str};
use tokio::io::AsyncBufReadExt;
use crate::{
element::{Element, Name, Namespace},
@ -8,28 +10,91 @@ use crate::{
/// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> {
stream: R,
inner: R,
// holds which tags we are in atm over depth
depth: Vec<Name>,
namespaces: Vec<(usize, Namespace)>,
}
impl<R> Reader<R>
where
R: AsyncBufRead,
{
// pub async fn read(&self) -> Result<impl From<Element>, Error> {
// let buf = self.stream.poll_fill_buf().await?;
// todo!()
// }
// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
// todo!()
// }
// pub async fn read_end(&self) -> Result<(), Error> {
// todo!()
// }
impl<R> Reader<R> {
pub fn new(reader: R) -> Self {
Self {
inner: reader,
depth: Vec::new(),
namespaces: Vec::new(),
}
}
}
// impl<R> Reader<R>
// where
// R: AsyncBufReadExt + Unpin,
// {
// /// could resursively read and include namespace tree with values to be shadowed within new local context
// async fn read_recursive(&mut self, namespaces: BTreeMap<Option<String>, String>) -> Result<Element, Error> {
// let element;
// let len;
// loop {
// let buf = self.inner.fill_buf().await?;
// let input = str::from_utf8(buf)?;
// match crate::xml::element(input) {
// Ok((rest, e)) => {
// element = e;
// len = buf.len() - rest.len();
// break;
// }
// Err(e) => match e {
// Err::Incomplete(_) => (),
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
// },
// }
// }
// let final;
// match element {
// crate::xml::Element::Empty(e) => {
// let final = Element {
// }
// },
// crate::xml::Element::NotEmpty(_, _, _) => todo!(),
// }
// self.inner.consume(len);
// todo!()
// }
// /// reads entire next prolog, element, or misc
// pub async fn read<E: From<Element>>(&mut self) -> Result<E, Error> {
// let element;
// let len;
// loop {
// let buf = self.inner.fill_buf().await?;
// let input = str::from_utf8(buf)?;
// match crate::xml::element(input) {
// Ok((rest, e)) => {
// element = e;
// len = buf.len() - rest.len();
// break;
// }
// Err(e) => match e {
// Err::Incomplete(_) => (),
// e => return Err::<E, Error>(Error::ParseError(input.to_owned())),
// },
// }
// }
// self.inner.consume(len);
// // Ok(element)
// todo!()
// }
// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
// todo!()
// }
// pub async fn read_end(&self) -> Result<(), Error> {
// todo!()
// }
// }
// impl<R: AsyncBufRead> Stream for Reader<R> {
// type Item = impl From<Element>;

View File

@ -5,8 +5,9 @@ use crate::{
error::Error,
};
// pub struct Writer<W, C = Composer> {
pub struct Writer<W> {
stream: W,
writer: W,
depth: Vec<Name>,
namespaces: Vec<(usize, Namespace)>,
}

1495
src/xml/composers.rs Normal file

File diff suppressed because it is too large Load Diff

611
src/xml/mod.rs Normal file
View File

@ -0,0 +1,611 @@
use std::char;
pub mod composers;
pub mod parsers;
/// [1] NSAttName ::= PrefixedAttName | DefaultAttName
#[derive(Clone, Debug)]
pub enum NSAttName<'s> {
PrefixedAttName(PrefixedAttName<'s>),
DefaultAttName,
}
/// [2] PrefixedAttName ::= 'xmlns:' NCName
#[derive(Clone, Debug)]
pub struct PrefixedAttName<'s>(NCName<'s>);
/// [3] DefaultAttName ::= 'xmlns';
#[derive(Clone, Debug)]
pub struct DefaultAttName;
/// [4] NCName ::= Name - (Char* ':' Char*)
#[derive(Clone, Debug)]
pub struct NCName<'s>(&'s str);
/// [7] QName ::= PrefixedName | UnprefixedName
#[derive(Clone, Debug)]
pub enum QName<'s> {
PrefixedName(PrefixedName<'s>),
UnprefixedName(UnprefixedName<'s>),
}
/// [8] PrefixedName ::= Prefix ':' LocalPart
#[derive(Clone, Debug)]
pub struct PrefixedName<'s> {
prefix: Prefix<'s>,
local_part: LocalPart<'s>,
}
/// [9] UnprefixedName ::= LocalPart
#[derive(Clone, Debug)]
pub struct UnprefixedName<'s>(LocalPart<'s>);
/// [10] Prefix ::= NCName
#[derive(Clone, Debug)]
pub struct Prefix<'s>(NCName<'s>);
/// [11] LocalPart ::= NCName
#[derive(Clone, Debug)]
pub struct LocalPart<'s>(NCName<'s>);
// xml spec
/// [1] document ::= prolog element Misc*
pub type Document<'s> = (Prolog<'s>, Element<'s>, Vec<Misc<'s>>);
/// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
#[repr(transparent)]
pub struct Char(char);
/// [3] S ::= (#x20 | #x9 | #xD | #xA)+
#[derive(Clone)]
#[repr(transparent)]
pub struct S;
/// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
#[repr(transparent)]
pub struct NameStartChar(char);
/// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
#[repr(transparent)]
pub struct NameChar(char);
/// [5] Name ::= NameStartChar (NameChar)*
#[derive(Debug, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct Name<'s>(&'s str);
/// [6] Names ::= Name (#x20 Name)*
#[repr(transparent)]
pub struct Names<'s>(Vec<Name<'s>>);
/// [7] Nmtoken ::= (NameChar)+
#[derive(Debug, Clone)]
#[repr(transparent)]
pub struct Nmtoken<'s>(&'s str);
/// [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
#[repr(transparent)]
pub struct Nmtokens<'s>(Vec<Nmtoken<'s>>);
#[derive(Clone, Debug)]
pub enum EntityValueData<'s> {
String(&'s str),
PEReference(PEReference<'s>),
Reference(Reference<'s>),
}
/// [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
/// | "'" ([^%&'] | PEReference | Reference)* "'"
#[derive(Debug)]
pub enum EntityValue<'s> {
DoubleQuoted(Vec<EntityValueData<'s>>),
SingleQuoted(Vec<EntityValueData<'s>>),
}
#[derive(Clone, Debug)]
pub enum AttValueData<'s> {
String(&'s str),
Reference(Reference<'s>),
}
/// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
/// | "'" ([^<&'] | Reference)* "'"
#[derive(Clone, Debug)]
pub enum AttValue<'s> {
DoubleQuoted(Vec<AttValueData<'s>>),
SingleQuoted(Vec<AttValueData<'s>>),
}
/// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
#[derive(Debug)]
pub enum SystemLiteral<'s> {
DoubleQuoted(&'s str),
SingleQuoted(&'s str),
}
/// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
#[derive(Debug)]
pub enum PubidLiteral<'s> {
DoubleQuoted(&'s str),
SingleQuoted(&'s str),
}
/// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
#[repr(transparent)]
pub struct PubidChar(char);
/// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct CharData<'s>(&'s str);
/// [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Comment<'s>(&'s str);
/// [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
#[derive(Clone, Debug)]
pub struct PI<'s> {
target: PITarget<'s>,
instruction: Option<&'s str>,
}
/// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct PITarget<'s>(Name<'s>);
/// [18] CDSect ::= CDStart CData CDEnd
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct CDSect<'s>(CData<'s>);
/// [19] CDStart ::= '<![CDATA['
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDStart;
/// [20] CData ::= (Char* - (Char* ']]>' Char*))
#[derive(Clone, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct CData<'s>(&'s str);
/// [21] CDEnd ::= ']]>'
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CDEnd;
/// [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
pub type Prolog<'s> = (
Option<XMLDecl<'s>>,
Vec<Misc<'s>>,
Option<(DoctypeDecl<'s>, Vec<Misc<'s>>)>,
);
/// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
#[derive(Debug)]
pub struct XMLDecl<'s> {
version_info: VersionInfo,
encoding_decl: Option<EncodingDecl<'s>>,
sd_decl: Option<SDDecl>,
}
/// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
#[derive(Debug)]
pub enum VersionInfo {
SingleQuoted(VersionNum),
DoubleQuoted(VersionNum),
}
/// [25] Eq ::= S? '=' S?
#[derive(Clone)]
pub struct Eq;
/// [26] VersionNum ::= '1.' [0-9]+
#[derive(Clone, Debug)]
pub enum VersionNum {
One,
OneDotOne,
}
/// [27] Misc ::= Comment | PI | S
#[derive(Clone, Debug)]
pub enum Misc<'s> {
Comment(Comment<'s>),
PI(PI<'s>),
// TODO: how to deal with whitespace
S,
}
/// [16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
/// [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
#[derive(Debug)]
pub struct DoctypeDecl<'s> {
name: QName<'s>,
external_id: Option<ExternalID<'s>>,
int_subset: Option<IntSubset<'s>>,
}
/// [28a] DeclSep ::= PEReference | S
#[derive(Clone, Debug)]
pub enum DeclSep<'s> {
PEReference(PEReference<'s>),
// TODO: tackle whitespace
S,
}
#[derive(Debug)]
pub enum IntSubsetDeclaration<'s> {
MarkupDecl(MarkupDecl<'s>),
DeclSep(DeclSep<'s>),
}
/// from [16] intSubset ::= (markupdecl | PEReference | S)*
/// [28b] intSubset ::= (markupdecl | DeclSep)*
pub type IntSubset<'s> = Vec<IntSubsetDeclaration<'s>>;
/// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
#[derive(Debug)]
pub enum MarkupDecl<'s> {
Elementdecl(Elementdecl<'s>),
AttlistDecl(AttlistDecl<'s>),
EntityDecl(EntityDecl<'s>),
NotationDecl(NotationDecl<'s>),
PI(PI<'s>),
Comment(Comment<'s>),
}
/// [30] extSubset ::= TextDecl? extSubsetDecl
pub struct ExtSubset<'s> {
text_decl: Option<TextDecl<'s>>,
ext_subset_decl: ExtSubsetDecl<'s>,
}
pub enum ExtSubsetDeclaration<'s> {
MarkupDecl(MarkupDecl<'s>),
ConditionalSect(ConditionalSect<'s>),
DeclSep(DeclSep<'s>),
}
/// [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
type ExtSubsetDecl<'s> = Vec<ExtSubsetDeclaration<'s>>;
/// [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
#[derive(Debug, Clone)]
pub enum SDDecl {
SingleQuoted(bool),
DoubleQuoted(bool),
}
// (Productions 33 through 38 have been removed.)
/// [39] element ::= EmptyElemTag | STag content ETag
#[derive(Debug, Clone)]
pub enum Element<'s> {
Empty(EmptyElemTag<'s>),
NotEmpty(STag<'s>, Content<'s>, ETag<'s>),
}
/// [12] STag ::= '<' QName (S Attribute)* S? '>'
/// [40] STag ::= '<' Name (S Attribute)* S? '>'
#[derive(Debug, Clone)]
pub struct STag<'s> {
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
/// [15] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue
// pub type Attribute<'s> = (Name<'s>, AttValue<'s>);
/// [41] Attribute ::= Name Eq AttValue
#[derive(Debug, Clone)]
pub enum Attribute<'s> {
NamespaceDeclaration {
ns_name: NSAttName<'s>,
value: AttValue<'s>,
},
Attribute {
name: QName<'s>,
value: AttValue<'s>,
},
}
/// [13] ETag ::= '</' QName S? '>'
/// [42] ETag ::= '</' Name S? '>'
#[derive(Debug, Clone)]
pub struct ETag<'s> {
name: QName<'s>,
}
#[derive(Debug, Clone)]
pub enum ContentItem<'s> {
// CharData(&'s str),
Element(Element<'s>),
Reference(Reference<'s>),
CDSect(CDSect<'s>),
PI(PI<'s>),
Comment(Comment<'s>),
}
/// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
#[derive(Debug, Clone)]
pub struct Content<'s> {
char_data: Option<CharData<'s>>,
content: Vec<(ContentItem<'s>, Option<CharData<'s>>)>,
}
/// [14] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>'
/// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' [WFC: Unique Att Spec]
#[derive(Debug, Clone)]
pub struct EmptyElemTag<'s> {
name: QName<'s>,
attributes: Vec<Attribute<'s>>,
}
/// [17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'
/// [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
#[derive(Debug)]
pub struct Elementdecl<'s> {
name: QName<'s>,
contentspec: Contentspec<'s>,
}
// TODO: casings???
// TODO: wtf does that todo mean?
/// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
#[derive(Clone, Debug)]
pub enum Contentspec<'s> {
Empty,
Any,
Mixed(Mixed<'s>),
Children(Children<'s>),
}
/// Occurence ::= ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub enum Occurence {
Once,
Optional,
Many0,
Many1,
}
#[derive(Clone, Debug)]
pub enum ChildrenKind<'s> {
Choice(Choice<'s>),
Seq(Seq<'s>),
}
/// [47] children ::= (choice | seq) ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub struct Children<'s> {
kind: ChildrenKind<'s>,
occurence: Occurence,
}
#[derive(Clone, Debug)]
pub enum CpKind<'s> {
Name(QName<'s>),
Choice(Choice<'s>),
Seq(Seq<'s>),
}
/// [18] cp ::= (QName | choice | seq) ('?' | '*' | '+')?
/// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
#[derive(Clone, Debug)]
pub struct Cp<'s> {
kind: CpKind<'s>,
occurence: Occurence,
}
/// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
#[derive(Clone, Debug)]
pub struct Choice<'s>(Vec<Cp<'s>>);
/// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
#[derive(Clone, Debug)]
pub struct Seq<'s>(Vec<Cp<'s>>);
// always contains #PCDATA
/// [19] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'
/// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
#[derive(Clone, Debug)]
pub struct Mixed<'s>(Vec<QName<'s>>);
/// [20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'
/// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
#[derive(Debug)]
pub struct AttlistDecl<'s> {
element_type: QName<'s>,
att_defs: Vec<AttDef<'s>>,
}
#[derive(Debug)]
pub enum AttDefName<'s> {
QName(QName<'s>),
NSAttName(NSAttName<'s>),
}
/// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
/// [53] AttDef ::= S Name S AttType S DefaultDecl
#[derive(Debug)]
pub struct AttDef<'s> {
name: AttDefName<'s>,
att_type: AttType<'s>,
default_decl: DefaultDecl<'s>,
}
/// [54] AttType ::= StringType | TokenizedType | EnumeratedType
#[derive(Clone, Debug)]
pub enum AttType<'s> {
StringType,
TokenizedType(TokenizedType),
EnumeratedType(EnumeratedType<'s>),
}
/// [55] StringType ::= 'CDATA'
#[derive(Clone)]
pub struct StringType;
/// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
#[derive(Clone, Debug)]
pub enum TokenizedType {
ID,
IDRef,
IDRefs,
Entity,
Entities,
NMToken,
NMTokens,
}
/// [57] EnumeratedType ::= NotationType | Enumeration
#[derive(Debug, Clone)]
pub enum EnumeratedType<'s> {
NotationType(NotationType<'s>),
Enumeration(Enumeration<'s>),
}
/// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
#[derive(Debug, Clone)]
pub struct NotationType<'s>(Vec<Name<'s>>);
/// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
#[derive(Debug, Clone)]
pub struct Enumeration<'s>(Vec<Nmtoken<'s>>);
/// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
#[derive(Debug, Clone)]
pub enum DefaultDecl<'s> {
Required,
Implied,
/// if bool == true, attribute MUST always have default value
Fixed(bool, AttValue<'s>),
}
/// [61] conditionalSect ::= includeSect | ignoreSect
pub enum ConditionalSect<'s> {
IncludeSect(IncludeSect<'s>),
IgnoreSect(IgnoreSect<'s>),
}
/// [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
pub struct IncludeSect<'s>(ExtSubsetDecl<'s>);
/// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
pub struct IgnoreSect<'s>(Vec<IgnoreSectContents<'s>>);
/// [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
pub struct IgnoreSectContents<'s> {
// TODO: what the fuck does this mean
ignore: Ignore<'s>,
ignore_list: Vec<(IgnoreSectContents<'s>, Ignore<'s>)>,
}
/// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
pub struct Ignore<'s>(&'s str);
/// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
#[derive(Clone, Debug)]
pub enum CharRef<'s> {
Decimal(&'s str),
Hexadecimal(&'s str),
}
/// [67] Reference ::= EntityRef | CharRef
#[derive(Clone, Debug)]
pub enum Reference<'s> {
EntityRef(EntityRef<'s>),
CharRef(CharRef<'s>),
}
/// [68] EntityRef ::= '&' Name ';'
#[derive(Clone, Debug)]
pub struct EntityRef<'s>(Name<'s>);
/// [69] PEReference ::= '%' Name ';'
#[derive(Clone, Debug)]
#[repr(transparent)]
pub struct PEReference<'s>(Name<'s>);
/// [70] EntityDecl ::= GEDecl | PEDecl
#[derive(Debug)]
pub enum EntityDecl<'s> {
GEDecl(GEDecl<'s>),
PEDecl(PEDecl<'s>),
}
/// [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
#[derive(Debug)]
pub struct GEDecl<'s> {
name: Name<'s>,
entity_def: EntityDef<'s>,
}
/// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
#[derive(Debug)]
pub struct PEDecl<'s> {
name: Name<'s>,
pe_def: PEDef<'s>,
}
/// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
#[derive(Debug)]
pub enum EntityDef<'s> {
EntityValue(EntityValue<'s>),
ExternalID {
external_id: ExternalID<'s>,
n_data_decl: Option<NDataDecl<'s>>,
},
}
/// [74] PEDef ::= EntityValue | ExternalID
#[derive(Debug)]
pub enum PEDef<'s> {
EntityValue(EntityValue<'s>),
ExternalID(ExternalID<'s>),
}
/// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
#[derive(Debug)]
pub enum ExternalID<'s> {
SYSTEM {
system_identifier: SystemLiteral<'s>,
},
PUBLIC {
public_identifier: PubidLiteral<'s>,
system_identifier: SystemLiteral<'s>,
},
}
/// [76] NDataDecl ::= S 'NDATA' S Name
#[derive(Debug)]
pub struct NDataDecl<'s>(Name<'s>);
/// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
pub struct TextDecl<'s> {
version_info: Option<VersionInfo>,
encoding_decl: EncodingDecl<'s>,
}
/// [78] extParsedEnt ::= TextDecl? content
pub struct ExtParsedEnt<'s> {
text_decl: Option<TextDecl<'s>>,
content: Content<'s>,
}
/// [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
#[derive(Debug)]
// TODO?: select quote version
pub struct EncodingDecl<'s>(EncName<'s>);
/// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
#[derive(Debug)]
pub struct EncName<'s>(&'s str);
#[derive(Debug)]
pub enum NotationDeclID<'s> {
External(ExternalID<'s>),
Public(PublicID<'s>),
}
/// [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
#[derive(Debug)]
pub struct NotationDecl<'s> {
name: Name<'s>,
id: NotationDeclID<'s>,
}
/// [83] PublicID ::= 'PUBLIC' S PubidLiteral
#[derive(Debug)]
pub struct PublicID<'s>(PubidLiteral<'s>);

1529
src/xml/parsers.rs Normal file

File diff suppressed because it is too large Load Diff