From a3dc4e1475a92c011cc55a070e268036abe88b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= Date: Wed, 20 Nov 2024 15:10:36 +0000 Subject: [PATCH] WIP: write start tag of element --- src/element.rs | 8 +++- src/error.rs | 1 + src/writer.rs | 72 ++++++++++++++++++++++++++++++----- src/xml/mod.rs | 101 +++++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 163 insertions(+), 19 deletions(-) diff --git a/src/element.rs b/src/element.rs index 6d2a6b7..d883c04 100644 --- a/src/element.rs +++ b/src/element.rs @@ -1,10 +1,14 @@ // elements resemble a final tree, including inherited namespace information -use std::collections::{HashMap, HashSet}; +use std::{ + collections::{HashMap, HashSet}, + convert::Infallible, + str::FromStr, +}; use crate::{ error::Error, - xml::{self, Attribute}, + xml::{self, parsers_complete::Parser, Attribute}, }; // when are namespaces names chosen then if they are automatically calculated diff --git a/src/error.rs b/src/error.rs index fcf43eb..69993ed 100644 --- a/src/error.rs +++ b/src/error.rs @@ -16,6 +16,7 @@ pub enum Error { MismatchedEndTag(Name, Name), NotInElement(String), ExtraData(String), + UndeclaredNamespace(String), } impl From for Error { diff --git a/src/writer.rs b/src/writer.rs index 21d5fe0..9e770c7 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::{collections::HashSet, str::FromStr}; use futures::Sink; use tokio::io::AsyncWrite; @@ -7,6 +7,7 @@ use crate::{ element::{Element, Name, NamespaceDeclaration}, error::Error, xml::{self, composers::Composer, parsers_complete::Parser, ETag}, + Result, }; // pub struct Writer { @@ -17,17 +18,68 @@ pub struct Writer { } impl Writer { - pub async fn write(&mut self, element: Element) -> Result<(), Error> { + pub async fn write(&mut self, element: Element) -> Result<()> { todo!() } - pub async fn write_start(&mut self, element: Element) -> Result<(), Error> { - todo!() + pub async fn write_start(&mut self, element: Element) -> Result<()> { + let mut namespace_declarations_stack: Vec<_> = self + .namespace_declarations + .iter() + .flatten() + .chain(&element.namespace_declarations) + .collect(); + let name_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace_declaration| { + namespace_declaration.namespace == element.name.namespace + }) + .ok_or(Error::UndeclaredNamespace(element.name.namespace.clone()))?; + let prefix = &name_namespace_declaration.prefix; + let name; + if let Some(prefix) = &prefix { + name = xml::QName::PrefixedName(xml::PrefixedName { + prefix: xml::Prefix::parse_full(prefix)?, + local_part: xml::LocalPart::parse_full(&element.name.local_name)?, + }) + } else { + name = xml::QName::UnprefixedName(xml::UnprefixedName::parse_full( + &element.name.local_name, + )?) + } + + namespace_declarations_stack.push(name_namespace_declaration); + + let mut attributes = Vec::new(); + + for namespace_declaration in &element.namespace_declarations { + let ns_name = namespace_declaration + .prefix + .as_ref() + .map(|prefix| -> Result<_> { + Ok(xml::NSAttName::PrefixedAttName( + xml::PrefixedAttName::parse_full(&prefix)?, + )) + }) + .unwrap_or(Ok(xml::NSAttName::DefaultAttName))?; + let value = xml::AttValue::from(namespace_declaration.namespace.as_str()); + let xml_attribute = xml::Attribute::NamespaceDeclaration { ns_name, value }; + attributes.push(xml_attribute); + } + + let s_tag = xml::STag { name, attributes }; + + s_tag.write(&mut self.inner).await?; + + self.depth.push(element.name); + self.namespace_declarations + .push(element.namespace_declarations); + Ok(()) } - pub async fn write_end(&mut self) -> Result<(), Error> { - let e_tag; + pub async fn write_end(&mut self) -> Result<()> { if let Some(name) = &self.depth.pop() { + let e_tag; let namespace_declarations_stack: Vec<_> = self.namespace_declarations.iter().flatten().collect(); let namespace_declaration = namespace_declarations_stack @@ -65,25 +117,25 @@ impl> Sink for Writer { fn poll_ready( self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>, - ) -> std::task::Poll> { + ) -> std::task::Poll> { todo!() } - fn start_send(self: std::pin::Pin<&mut Self>, item: E) -> Result<(), Self::Error> { + fn start_send(self: std::pin::Pin<&mut Self>, item: E) -> Result<()> { todo!() } fn poll_flush( self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>, - ) -> std::task::Poll> { + ) -> std::task::Poll> { todo!() } fn poll_close( self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>, - ) -> std::task::Poll> { + ) -> std::task::Poll> { todo!() } } diff --git a/src/xml/mod.rs b/src/xml/mod.rs index 2d20ca0..8fb5419 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -1,4 +1,6 @@ -use std::{char, ops::Deref}; +use std::{char, convert::Infallible, ops::Deref, str::FromStr}; + +use parsers_complete::Parser; use crate::error::Error; @@ -228,14 +230,14 @@ pub enum EntityValue<'s> { SingleQuoted(Vec>), } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum AttValueData<'s> { String(&'s str), Reference(Reference<'s>), } /// [10] AttValue ::= '"' ([^<&"] | Reference)* '"' /// | "'" ([^<&'] | Reference)* "'" -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum AttValue<'s> { DoubleQuoted(Vec>), SingleQuoted(Vec>), @@ -259,6 +261,34 @@ impl<'s> AttValue<'s> { } } +impl<'s> From<&'s str> for AttValue<'s> { + fn from(s: &'s str) -> AttValue<'s> { + let mut data = Vec::new(); + for str in s.split_inclusive(|c| c == '<' || c == '"') { + if let Some(str) = str.strip_suffix('<') { + if !str.is_empty() { + data.push(AttValueData::String(str)) + } + data.push(AttValueData::Reference(Reference::EntityRef(EntityRef( + Name::parse_full("lt").unwrap(), + )))) + } else if let Some(str) = str.strip_suffix('"') { + if !str.is_empty() { + data.push(AttValueData::String(str)) + } + data.push(AttValueData::Reference(Reference::EntityRef(EntityRef( + Name::parse_full("quot").unwrap(), + )))) + } else { + if !str.is_empty() { + data.push(AttValueData::String(str)) + } + } + } + AttValue::DoubleQuoted(data) + } +} + /// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") #[derive(Debug)] pub enum SystemLiteral<'s> { @@ -673,7 +703,7 @@ pub struct IgnoreSectContents<'s> { pub struct Ignore<'s>(&'s str); /// [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum CharRef<'s> { Decimal(&'s str), Hexadecimal(&'s str), @@ -706,7 +736,7 @@ impl<'s> CharRef<'s> { } /// [67] Reference ::= EntityRef | CharRef -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum Reference<'s> { EntityRef(EntityRef<'s>), CharRef(CharRef<'s>), @@ -729,8 +759,8 @@ impl<'s> Reference<'s> { } /// [68] EntityRef ::= '&' Name ';' -#[derive(Clone, Debug)] -pub struct EntityRef<'s>(Name<'s>); +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct EntityRef<'s>(pub(crate) Name<'s>); impl<'s> Deref for EntityRef<'s> { type Target = Name<'s>; @@ -835,3 +865,60 @@ pub struct NotationDecl<'s> { /// [83] PublicID ::= 'PUBLIC' S PubidLiteral #[derive(Debug)] pub struct PublicID<'s>(PubidLiteral<'s>); + +#[cfg(test)] +mod test { + use super::{AttValue, AttValueData, EntityRef, Name, Reference}; + + #[test] + fn att_value_from_str() { + assert_eq!( + AttValue::from("hsdaflaskdf