WIP: stream parsing
This commit is contained in:
parent
c08b4504ab
commit
1f0103cbec
|
@ -38,6 +38,12 @@ dependencies = [
|
|||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.89"
|
||||
|
@ -173,6 +179,17 @@ dependencies = [
|
|||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"wasi",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
|
@ -243,6 +260,16 @@ dependencies = [
|
|||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.5.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.52"
|
||||
|
@ -261,7 +288,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
"libc",
|
||||
"mio",
|
||||
"pin-project-lite",
|
||||
"socket2",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -269,3 +301,148 @@ name = "unicode-ident"
|
|||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
|
||||
dependencies = [
|
||||
"windows-targets 0.52.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.48.5",
|
||||
"windows_aarch64_msvc 0.48.5",
|
||||
"windows_i686_gnu 0.48.5",
|
||||
"windows_i686_msvc 0.48.5",
|
||||
"windows_x86_64_gnu 0.48.5",
|
||||
"windows_x86_64_gnullvm 0.48.5",
|
||||
"windows_x86_64_msvc 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.52.5",
|
||||
"windows_aarch64_msvc 0.52.5",
|
||||
"windows_i686_gnu 0.52.5",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc 0.52.5",
|
||||
"windows_x86_64_gnu 0.52.5",
|
||||
"windows_x86_64_gnullvm 0.52.5",
|
||||
"windows_x86_64_msvc 0.52.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
|
||||
|
|
|
@ -8,4 +8,4 @@ edition = "2021"
|
|||
[dependencies]
|
||||
futures = "0.3.30"
|
||||
nom = "7.1.3"
|
||||
tokio = "1.36.0"
|
||||
tokio = { version = "1.36.0", features = ["io-util", "net"] }
|
||||
|
|
|
@ -23,13 +23,13 @@ pub enum Node {
|
|||
// should this be a trait?
|
||||
pub struct Element {
|
||||
name: Name,
|
||||
// namespace: (Name, String), // can't have this, must be external method that is called within the context of a reader/writer
|
||||
// namespace: Name,
|
||||
// each element once created contains the qualified namespace information for that element
|
||||
// the name contains the qualified namespace so this is unnecessary
|
||||
// namespace: String,
|
||||
// hashmap of explicit namespace declarations on the element itself only
|
||||
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
|
||||
// namespaces: HashMap<Option<String>, String>,
|
||||
namespaces: HashMap<Option<String>, String>,
|
||||
// attributes can be in a different namespace than the element. how to make sure they are valid?
|
||||
// maybe include the namespace instead of or with the prefix
|
||||
// you can calculate the prefix from the namespaced name and the current writer context
|
||||
|
|
20
src/error.rs
20
src/error.rs
|
@ -1 +1,19 @@
|
|||
pub enum Error {}
|
||||
use std::str::Utf8Error;
|
||||
|
||||
pub enum Error {
|
||||
ReadError(std::io::Error),
|
||||
Utf8Error(Utf8Error),
|
||||
ParseError(String),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
fn from(e: std::io::Error) -> Self {
|
||||
Self::ReadError(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Utf8Error> for Error {
|
||||
fn from(e: Utf8Error) -> Self {
|
||||
Self::Utf8Error(e)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use peanuts::parser::document;
|
||||
use peanuts::xml::document;
|
||||
|
||||
fn main() {
|
||||
let document = document(
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
use futures::Stream;
|
||||
use tokio::io::AsyncBufRead;
|
||||
use nom::Err;
|
||||
use std::str;
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
|
||||
use crate::{
|
||||
element::{Element, Name, Namespace},
|
||||
|
@ -8,20 +10,50 @@ use crate::{
|
|||
|
||||
/// streaming reader that tracks depth and available namespaces at current depth
|
||||
pub struct Reader<R> {
|
||||
stream: R,
|
||||
inner: R,
|
||||
// holds which tags we are in atm over depth
|
||||
depth: Vec<Name>,
|
||||
namespaces: Vec<(usize, Namespace)>,
|
||||
}
|
||||
|
||||
impl<R> Reader<R> {
|
||||
pub fn new(reader: R) -> Self {
|
||||
Self {
|
||||
inner: reader,
|
||||
depth: Vec::new(),
|
||||
namespaces: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> Reader<R>
|
||||
where
|
||||
R: AsyncBufRead,
|
||||
R: AsyncBufReadExt + Unpin,
|
||||
{
|
||||
// pub async fn read(&self) -> Result<impl From<Element>, Error> {
|
||||
// let buf = self.stream.poll_fill_buf().await?;
|
||||
// todo!()
|
||||
// }
|
||||
/// reads entire next prolog, element, or misc
|
||||
pub async fn read<'s>(&'s mut self) -> Result<crate::xml::Element<'s>, Error> {
|
||||
let element;
|
||||
let len;
|
||||
loop {
|
||||
let buf = self.inner.fill_buf().await?;
|
||||
let input = str::from_utf8(buf)?;
|
||||
match crate::xml::element(input) {
|
||||
Ok((rest, e)) => {
|
||||
element = e;
|
||||
len = buf.len() - rest.len();
|
||||
break;
|
||||
}
|
||||
Err(e) => match e {
|
||||
Err::Incomplete(_) => (),
|
||||
e => return Err(Error::ParseError(input.to_owned())),
|
||||
},
|
||||
}
|
||||
}
|
||||
self.inner.consume(len);
|
||||
|
||||
// Ok(element)
|
||||
todo!()
|
||||
}
|
||||
// pub async fn read_start(&self) -> Result<impl From<Element>, Error> {
|
||||
// todo!()
|
||||
// }
|
||||
|
|
Loading…
Reference in New Issue