WIP: stream parsing

This commit is contained in:
cel 🌸 2024-06-27 20:22:16 +01:00
parent c08b4504ab
commit 1f0103cbec
6 changed files with 239 additions and 12 deletions

177
Cargo.lock generated
View File

@ -38,6 +38,12 @@ dependencies = [
"rustc-demangle",
]
[[package]]
name = "bytes"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
[[package]]
name = "cc"
version = "1.0.89"
@ -173,6 +179,17 @@ dependencies = [
"autocfg",
]
[[package]]
name = "mio"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"wasi",
"windows-sys 0.48.0",
]
[[package]]
name = "nom"
version = "7.1.3"
@ -243,6 +260,16 @@ dependencies = [
"autocfg",
]
[[package]]
name = "socket2"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "syn"
version = "2.0.52"
@ -261,7 +288,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931"
dependencies = [
"backtrace",
"bytes",
"libc",
"mio",
"pin-project-lite",
"socket2",
"windows-sys 0.48.0",
]
[[package]]
@ -269,3 +301,148 @@ name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.5",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
dependencies = [
"windows_aarch64_gnullvm 0.52.5",
"windows_aarch64_msvc 0.52.5",
"windows_i686_gnu 0.52.5",
"windows_i686_gnullvm",
"windows_i686_msvc 0.52.5",
"windows_x86_64_gnu 0.52.5",
"windows_x86_64_gnullvm 0.52.5",
"windows_x86_64_msvc 0.52.5",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"

View File

@ -8,4 +8,4 @@ edition = "2021"
[dependencies]
futures = "0.3.30"
nom = "7.1.3"
tokio = "1.36.0"
tokio = { version = "1.36.0", features = ["io-util", "net"] }

View File

@ -23,13 +23,13 @@ pub enum Node {
// should this be a trait?
pub struct Element {
name: Name,
// namespace: (Name, String), // can't have this, must be external method that is called within the context of a reader/writer
// namespace: Name,
// each element once created contains the qualified namespace information for that element
// the name contains the qualified namespace so this is unnecessary
// namespace: String,
// hashmap of explicit namespace declarations on the element itself only
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
// namespaces: HashMap<Option<String>, String>,
namespaces: HashMap<Option<String>, String>,
// attributes can be in a different namespace than the element. how to make sure they are valid?
// maybe include the namespace instead of or with the prefix
// you can calculate the prefix from the namespaced name and the current writer context

View File

@ -1 +1,19 @@
pub enum Error {}
use std::str::Utf8Error;
pub enum Error {
ReadError(std::io::Error),
Utf8Error(Utf8Error),
ParseError(String),
}
impl From<std::io::Error> for Error {
fn from(e: std::io::Error) -> Self {
Self::ReadError(e)
}
}
impl From<Utf8Error> for Error {
fn from(e: Utf8Error) -> Self {
Self::Utf8Error(e)
}
}

View File

@ -1,4 +1,4 @@
use peanuts::parser::document;
use peanuts::xml::document;
fn main() {
let document = document(

View File

@ -1,5 +1,7 @@
use futures::Stream;
use tokio::io::AsyncBufRead;
use nom::Err;
use std::str;
use tokio::io::AsyncBufReadExt;
use crate::{
element::{Element, Name, Namespace},
@ -8,20 +10,50 @@ use crate::{
/// streaming reader that tracks depth and available namespaces at current depth
pub struct Reader<R> {
stream: R,
inner: R,
// holds which tags we are in atm over depth
depth: Vec<Name>,
namespaces: Vec<(usize, Namespace)>,
}
impl<R> Reader<R> {
pub fn new(reader: R) -> Self {
Self {
inner: reader,
depth: Vec::new(),
namespaces: Vec::new(),
}
}
}
impl<R> Reader<R>
where
R: AsyncBufRead,
R: AsyncBufReadExt + Unpin,
{
// pub async fn read(&self) -> Result<impl From<Element>, Error> {
// let buf = self.stream.poll_fill_buf().await?;
// todo!()
// }
/// reads entire next prolog, element, or misc
pub async fn read<'s>(&'s mut self) -> Result<crate::xml::Element<'s>, Error> {
let element;
let len;
loop {
let buf = self.inner.fill_buf().await?;
let input = str::from_utf8(buf)?;
match crate::xml::element(input) {
Ok((rest, e)) => {
element = e;
len = buf.len() - rest.len();
break;
}
Err(e) => match e {
Err::Incomplete(_) => (),
e => return Err(Error::ParseError(input.to_owned())),
},
}
}
self.inner.consume(len);
// Ok(element)
todo!()
}
// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
// todo!()
// }