From c2a84072ac8c393a28711e118942da7b0377d895 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= Date: Tue, 19 Nov 2024 16:07:34 +0000 Subject: [PATCH] fix attribute namespace resolution --- src/element.rs | 4 +- src/reader.rs | 468 ++++++++++++++++++++----------------------------- 2 files changed, 196 insertions(+), 276 deletions(-) diff --git a/src/element.rs b/src/element.rs index 4c39c6a..6d2a6b7 100644 --- a/src/element.rs +++ b/src/element.rs @@ -19,7 +19,7 @@ pub struct NamespaceDeclaration { #[derive(PartialEq, Eq, Hash, Clone, Debug)] pub struct Name { pub namespace: String, - pub name: String, + pub local_name: String, } #[derive(Debug)] @@ -40,7 +40,7 @@ pub struct Element { // namespace: String, // hashmap of explicit namespace declarations on the element itself only // possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader. - // pub namespace_decl: HashSet, + pub namespace_declarations: HashSet, // attributes can be in a different namespace than the element. how to make sure they are valid? // maybe include the namespace instead of or with the prefix // you can calculate the prefix from the namespaced name and the current writer context diff --git a/src/reader.rs b/src/reader.rs index 654ca2a..a05e73b 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -277,10 +277,12 @@ where impl Reader { fn start_tag_from_xml( depth: &mut Vec, - namespaces: &mut Vec>, + namespace_declarations: &mut Vec>, s_tag: xml::STag, ) -> Result { - let mut namespace_declarations = HashSet::new(); + // namespace declarations on element + + let mut element_namespace_declarations = HashSet::new(); for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| { if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { Some((ns_name, value)) @@ -298,20 +300,49 @@ impl Reader { prefix, namespace: namespace.process()?, }; - if !namespace_declarations.insert(namespace.clone()) { + if !element_namespace_declarations.insert(namespace.clone()) { return Err(Error::DuplicateNameSpaceDeclaration(namespace)); } } - // all namespaces available to the element (from both parent elements and element itself) - let namespace_stack: Vec<&NamespaceDeclaration> = namespaces + // all namespaces available in the element scope (from both parent elements and element itself) + let namespace_declarations_stack: Vec<&NamespaceDeclaration> = namespace_declarations .iter() .flatten() - .chain(namespace_declarations.iter()) + .chain(element_namespace_declarations.iter()) .collect(); - let mut attributes = HashMap::new(); + // element name and default attribute namespace + let element_namespace_declaration; + let element_local_name = s_tag.name.local_part().to_string(); + + match s_tag.name.prefix() { + Some(prefix) => { + element_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace| namespace.prefix.as_deref() == Some(prefix)); + } + None => { + element_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace| namespace.prefix == None); + } + } + + let element_default_namespace = element_namespace_declaration + .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))? + .namespace + .clone(); + + let element_name = Name { + namespace: element_default_namespace, + local_name: element_local_name, + }; + + // attributes + + let mut attributes = HashMap::new(); for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| { if let xml::Attribute::Attribute { name, value } = attribute { Some((name, value)) @@ -319,28 +350,26 @@ impl Reader { None } }) { - let namespace; - let attribute_name; - match q_name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - attribute_name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - attribute_name = unprefixed_name.to_string(); + let attribute_namespace_declaration; + let attribute_local_name = q_name.local_part().to_string(); + match q_name.prefix() { + Some(prefix) => { + attribute_namespace_declaration = + namespace_declarations_stack + .iter() + .rfind(|namespace_declaration| { + namespace_declaration.prefix.as_deref() == Some(prefix) + }); } + None => attribute_namespace_declaration = element_namespace_declaration, } - if let Some(namespace_declaration) = namespace { + if let Some(namespace_declaration) = attribute_namespace_declaration { let name = Name { namespace: namespace_declaration.namespace.clone(), - name: attribute_name, + local_name: attribute_local_name, }; let value = value.process()?; + // check for duplicate attribute if let Some(_value) = attributes.insert(name, value) { return Err(Error::DuplicateAttribute(q_name.to_string())); } @@ -349,38 +378,13 @@ impl Reader { } } - let name; - let namespace; - match &s_tag.name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix.as_deref() == Some(**prefixed_name.prefix)); - name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - name = unprefixed_name.to_string(); - } - } + depth.push(element_name.clone()); - let namespace_declaration = (*namespace - .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?) - .clone(); - - let name = Name { - namespace: namespace_declaration.namespace, - name, - }; - - depth.push(name.clone()); - - namespaces.push(namespace_declarations.clone()); + namespace_declarations.push(element_namespace_declarations.clone()); return Ok(Element { - name, + name: element_name, + namespace_declarations: element_namespace_declarations, attributes, content: Vec::new(), }); @@ -418,13 +422,16 @@ impl Reader { name = unprefixed_name.to_string(); } } - let e_tag_name = Name { namespace, name }; + let e_tag_name = Name { + namespace, + local_name: name, + }; if s_tag_name == e_tag_name { namespaces.pop(); return Ok(()); } else { return Err(Error::MismatchedEndTag( - s_tag_name.name, + s_tag_name.local_name, e_tag.name.to_string(), )); } @@ -434,240 +441,153 @@ impl Reader { } fn element_from_xml( - namespaces: &mut Vec>, + namespace_declarations: &mut Vec>, element: xml::Element, ) -> Result { + let xml_name; + let xml_attributes; + let xml_content; + let xml_e_name; + match element { xml::Element::Empty(empty_elem_tag) => { - let mut namespace_declarations = HashSet::new(); - for (prefix, namespace) in - empty_elem_tag.attributes.iter().filter_map(|attribute| { - if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { - Some((ns_name, value)) - } else { - None - } - }) - { - let prefix = match prefix { - xml::NSAttName::PrefixedAttName(prefixed_att_name) => { - Some(prefixed_att_name.to_string()) - } - xml::NSAttName::DefaultAttName => None, - }; - let namespace = NamespaceDeclaration { - prefix, - namespace: namespace.process()?, - }; - if !namespace_declarations.insert(namespace.clone()) { - return Err(Error::DuplicateNameSpaceDeclaration(namespace)); - } - } - - // all namespaces available to the element (from both parent elements and element itself) - let namespace_stack: Vec<&NamespaceDeclaration> = namespaces - .iter() - .flatten() - .chain(namespace_declarations.iter()) - .collect(); - - let mut attributes = HashMap::new(); - - for (q_name, value) in empty_elem_tag.attributes.iter().filter_map(|attribute| { - if let xml::Attribute::Attribute { name, value } = attribute { - Some((name, value)) - } else { - None - } - }) { - let namespace; - let attribute_name; - match q_name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - attribute_name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - attribute_name = unprefixed_name.to_string(); - } - } - if let Some(namespace) = namespace { - let namespace = (*namespace).clone(); - let name = Name { - namespace: namespace.namespace, - name: attribute_name, - }; - let value = value.process()?; - if let Some(_value) = attributes.insert(name, value) { - return Err(Error::DuplicateAttribute(q_name.to_string())); - } - } else { - return Err(Error::UnqualifiedNamespace(q_name.to_string())); - } - } - - let name; - let namespace; - match &empty_elem_tag.name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - name = unprefixed_name.to_string(); - } - } - - let namespace = (*namespace - .ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?) - .clone(); - - let name = Name { - namespace: namespace.namespace, - name, - }; - - return Ok(Element { - name, - attributes, - content: Vec::new(), - }); + xml_name = empty_elem_tag.name; + xml_attributes = empty_elem_tag.attributes; + xml_content = None; + xml_e_name = None; } xml::Element::NotEmpty(s_tag, content, e_tag) => { - if s_tag.name != e_tag.name { - return Err(Error::MismatchedEndTag( - s_tag.name.to_string(), - e_tag.name.to_string(), - )); - } - let mut namespace_declarations = HashSet::new(); - for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| { - if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { - Some((ns_name, value)) - } else { - None - } - }) { - let prefix = match prefix { - xml::NSAttName::PrefixedAttName(prefixed_att_name) => { - Some(prefixed_att_name.to_string()) - } - xml::NSAttName::DefaultAttName => None, - }; - let namespace = NamespaceDeclaration { - prefix, - namespace: namespace.process()?, - }; - if !namespace_declarations.insert(namespace.clone()) { - return Err(Error::DuplicateNameSpaceDeclaration(namespace)); - } - } - - // all namespaces available to the element (from both parent elements and element itself) - let namespace_stack: Vec<&NamespaceDeclaration> = namespaces - .iter() - .flatten() - .chain(namespace_declarations.iter()) - .collect(); - - let mut attributes = HashMap::new(); - - for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| { - if let xml::Attribute::Attribute { name, value } = attribute { - Some((name, value)) - } else { - None - } - }) { - let namespace; - let attribute_name; - match q_name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - attribute_name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - attribute_name = unprefixed_name.to_string(); - } - } - if let Some(namespace) = namespace { - let namespace = (*namespace).clone(); - let name = Name { - namespace: namespace.namespace, - name: attribute_name, - }; - let value = value.process()?; - if let Some(_value) = attributes.insert(name, value) { - return Err(Error::DuplicateAttribute(q_name.to_string())); - } - } else { - return Err(Error::UnqualifiedNamespace(q_name.to_string())); - } - } - - let name; - let namespace; - match &s_tag.name { - xml::QName::PrefixedName(prefixed_name) => { - namespace = namespace_stack.iter().rfind(|namespace| { - namespace.prefix.as_deref() == Some(**prefixed_name.prefix) - }); - name = prefixed_name.local_part.to_string(); - } - xml::QName::UnprefixedName(unprefixed_name) => { - namespace = namespace_stack - .iter() - .rfind(|namespace| namespace.prefix == None); - name = unprefixed_name.to_string(); - } - } - - let namespace = (*namespace - .ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?) - .clone(); - - let name = Name { - namespace: namespace.namespace, - name, - }; - - namespaces.push(namespace_declarations.clone()); - - let content = Self::content_from_xml(namespaces, content)?; - - namespaces.pop(); - - return Ok(Element { - name, - attributes, - content, - }); + xml_name = s_tag.name; + xml_attributes = s_tag.attributes; + xml_content = Some(content); + xml_e_name = Some(e_tag.name); } } + + // namespace declarations on element + + let mut element_namespace_declarations = HashSet::new(); + for (prefix, namespace) in xml_attributes.iter().filter_map(|attribute| { + if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { + Some((ns_name, value)) + } else { + None + } + }) { + let prefix = match prefix { + xml::NSAttName::PrefixedAttName(prefixed_att_name) => { + Some(prefixed_att_name.to_string()) + } + xml::NSAttName::DefaultAttName => None, + }; + let namespace = NamespaceDeclaration { + prefix, + namespace: namespace.process()?, + }; + if !element_namespace_declarations.insert(namespace.clone()) { + return Err(Error::DuplicateNameSpaceDeclaration(namespace)); + } + } + + // all namespaces available in the element scope (from both parent elements and element itself) + let namespace_declarations_stack: Vec<&NamespaceDeclaration> = namespace_declarations + .iter() + .flatten() + .chain(element_namespace_declarations.iter()) + .collect(); + + // element name and default attribute namespace + + let element_namespace_declaration; + let element_local_name = xml_name.local_part().to_string(); + + match xml_name.prefix() { + Some(prefix) => { + element_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace| namespace.prefix.as_deref() == Some(prefix)); + } + None => { + element_namespace_declaration = namespace_declarations_stack + .iter() + .rfind(|namespace| namespace.prefix == None); + } + } + + let element_default_namespace = element_namespace_declaration + .ok_or_else(|| Error::UnqualifiedNamespace(xml_name.to_string()))? + .namespace + .clone(); + + let element_name = Name { + namespace: element_default_namespace, + local_name: element_local_name, + }; + + // attributes + + let mut attributes = HashMap::new(); + for (q_name, value) in xml_attributes.iter().filter_map(|attribute| { + if let xml::Attribute::Attribute { name, value } = attribute { + Some((name, value)) + } else { + None + } + }) { + let attribute_namespace_declaration; + let attribute_local_name = q_name.local_part().to_string(); + match q_name.prefix() { + Some(prefix) => { + attribute_namespace_declaration = + namespace_declarations_stack + .iter() + .rfind(|namespace_declaration| { + namespace_declaration.prefix.as_deref() == Some(prefix) + }); + } + None => attribute_namespace_declaration = element_namespace_declaration, + } + if let Some(namespace_declaration) = attribute_namespace_declaration { + let name = Name { + namespace: namespace_declaration.namespace.clone(), + local_name: attribute_local_name, + }; + let value = value.process()?; + // check for duplicate attribute + if let Some(_value) = attributes.insert(name, value) { + return Err(Error::DuplicateAttribute(q_name.to_string())); + } + } else { + return Err(Error::UnqualifiedNamespace(q_name.to_string())); + } + } + + let content; + if let Some(xml_content) = xml_content { + namespace_declarations.push(element_namespace_declarations.clone()); + + content = Self::content_from_xml(namespace_declarations, xml_content)?; + + namespace_declarations.pop(); + } else { + content = Vec::new(); + } + + return Ok(Element { + name: element_name, + namespace_declarations: element_namespace_declarations, + attributes, + content, + }); } fn content_from_xml( namespaces: &mut Vec>, - element: xml::Content, + xml_content: xml::Content, ) -> Result> { let mut content = Vec::new(); - let mut text = element.char_data.map(|str| String::from(*str)); - for (content_item, char_data) in element.content { + let mut text = xml_content.char_data.map(|str| String::from(*str)); + for (content_item, char_data) in xml_content.content { match content_item { xml::ContentItem::Element(element) => { text.map(|text| content.push(Content::Text(text)));