fix attribute namespace resolution

This commit is contained in:
cel 🌸 2024-11-19 16:07:34 +00:00
parent c8ed16a2d1
commit c2a84072ac
2 changed files with 196 additions and 276 deletions

View File

@ -19,7 +19,7 @@ pub struct NamespaceDeclaration {
#[derive(PartialEq, Eq, Hash, Clone, Debug)] #[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub struct Name { pub struct Name {
pub namespace: String, pub namespace: String,
pub name: String, pub local_name: String,
} }
#[derive(Debug)] #[derive(Debug)]
@ -40,7 +40,7 @@ pub struct Element {
// namespace: String, // namespace: String,
// hashmap of explicit namespace declarations on the element itself only // hashmap of explicit namespace declarations on the element itself only
// possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader. // possibly not needed as can be calculated at write time depending on context and qualified namespace, and for reading, element validity and namespaces are kept track of by the reader.
// pub namespace_decl: HashSet<Namespace>, pub namespace_declarations: HashSet<NamespaceDeclaration>,
// attributes can be in a different namespace than the element. how to make sure they are valid? // attributes can be in a different namespace than the element. how to make sure they are valid?
// maybe include the namespace instead of or with the prefix // maybe include the namespace instead of or with the prefix
// you can calculate the prefix from the namespaced name and the current writer context // you can calculate the prefix from the namespaced name and the current writer context

View File

@ -277,10 +277,12 @@ where
impl<R> Reader<R> { impl<R> Reader<R> {
fn start_tag_from_xml( fn start_tag_from_xml(
depth: &mut Vec<Name>, depth: &mut Vec<Name>,
namespaces: &mut Vec<HashSet<NamespaceDeclaration>>, namespace_declarations: &mut Vec<HashSet<NamespaceDeclaration>>,
s_tag: xml::STag, s_tag: xml::STag,
) -> Result<Element> { ) -> Result<Element> {
let mut namespace_declarations = HashSet::new(); // namespace declarations on element
let mut element_namespace_declarations = HashSet::new();
for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| { for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
Some((ns_name, value)) Some((ns_name, value))
@ -298,20 +300,49 @@ impl<R> Reader<R> {
prefix, prefix,
namespace: namespace.process()?, namespace: namespace.process()?,
}; };
if !namespace_declarations.insert(namespace.clone()) { if !element_namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpaceDeclaration(namespace)); return Err(Error::DuplicateNameSpaceDeclaration(namespace));
} }
} }
// all namespaces available to the element (from both parent elements and element itself) // all namespaces available in the element scope (from both parent elements and element itself)
let namespace_stack: Vec<&NamespaceDeclaration> = namespaces let namespace_declarations_stack: Vec<&NamespaceDeclaration> = namespace_declarations
.iter() .iter()
.flatten() .flatten()
.chain(namespace_declarations.iter()) .chain(element_namespace_declarations.iter())
.collect(); .collect();
let mut attributes = HashMap::new(); // element name and default attribute namespace
let element_namespace_declaration;
let element_local_name = s_tag.name.local_part().to_string();
match s_tag.name.prefix() {
Some(prefix) => {
element_namespace_declaration = namespace_declarations_stack
.iter()
.rfind(|namespace| namespace.prefix.as_deref() == Some(prefix));
}
None => {
element_namespace_declaration = namespace_declarations_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
}
}
let element_default_namespace = element_namespace_declaration
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?
.namespace
.clone();
let element_name = Name {
namespace: element_default_namespace,
local_name: element_local_name,
};
// attributes
let mut attributes = HashMap::new();
for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| { for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::Attribute { name, value } = attribute { if let xml::Attribute::Attribute { name, value } = attribute {
Some((name, value)) Some((name, value))
@ -319,28 +350,26 @@ impl<R> Reader<R> {
None None
} }
}) { }) {
let namespace; let attribute_namespace_declaration;
let attribute_name; let attribute_local_name = q_name.local_part().to_string();
match q_name { match q_name.prefix() {
xml::QName::PrefixedName(prefixed_name) => { Some(prefix) => {
namespace = namespace_stack.iter().rfind(|namespace| { attribute_namespace_declaration =
namespace.prefix.as_deref() == Some(**prefixed_name.prefix) namespace_declarations_stack
});
attribute_name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter() .iter()
.rfind(|namespace| namespace.prefix == None); .rfind(|namespace_declaration| {
attribute_name = unprefixed_name.to_string(); namespace_declaration.prefix.as_deref() == Some(prefix)
});
} }
None => attribute_namespace_declaration = element_namespace_declaration,
} }
if let Some(namespace_declaration) = namespace { if let Some(namespace_declaration) = attribute_namespace_declaration {
let name = Name { let name = Name {
namespace: namespace_declaration.namespace.clone(), namespace: namespace_declaration.namespace.clone(),
name: attribute_name, local_name: attribute_local_name,
}; };
let value = value.process()?; let value = value.process()?;
// check for duplicate attribute
if let Some(_value) = attributes.insert(name, value) { if let Some(_value) = attributes.insert(name, value) {
return Err(Error::DuplicateAttribute(q_name.to_string())); return Err(Error::DuplicateAttribute(q_name.to_string()));
} }
@ -349,38 +378,13 @@ impl<R> Reader<R> {
} }
} }
let name; depth.push(element_name.clone());
let namespace;
match &s_tag.name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix.as_deref() == Some(**prefixed_name.prefix));
name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
name = unprefixed_name.to_string();
}
}
let namespace_declaration = (*namespace namespace_declarations.push(element_namespace_declarations.clone());
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
.clone();
let name = Name {
namespace: namespace_declaration.namespace,
name,
};
depth.push(name.clone());
namespaces.push(namespace_declarations.clone());
return Ok(Element { return Ok(Element {
name, name: element_name,
namespace_declarations: element_namespace_declarations,
attributes, attributes,
content: Vec::new(), content: Vec::new(),
}); });
@ -418,13 +422,16 @@ impl<R> Reader<R> {
name = unprefixed_name.to_string(); name = unprefixed_name.to_string();
} }
} }
let e_tag_name = Name { namespace, name }; let e_tag_name = Name {
namespace,
local_name: name,
};
if s_tag_name == e_tag_name { if s_tag_name == e_tag_name {
namespaces.pop(); namespaces.pop();
return Ok(()); return Ok(());
} else { } else {
return Err(Error::MismatchedEndTag( return Err(Error::MismatchedEndTag(
s_tag_name.name, s_tag_name.local_name,
e_tag.name.to_string(), e_tag.name.to_string(),
)); ));
} }
@ -434,124 +441,33 @@ impl<R> Reader<R> {
} }
fn element_from_xml( fn element_from_xml(
namespaces: &mut Vec<HashSet<NamespaceDeclaration>>, namespace_declarations: &mut Vec<HashSet<NamespaceDeclaration>>,
element: xml::Element, element: xml::Element,
) -> Result<Element> { ) -> Result<Element> {
let xml_name;
let xml_attributes;
let xml_content;
let xml_e_name;
match element { match element {
xml::Element::Empty(empty_elem_tag) => { xml::Element::Empty(empty_elem_tag) => {
let mut namespace_declarations = HashSet::new(); xml_name = empty_elem_tag.name;
for (prefix, namespace) in xml_attributes = empty_elem_tag.attributes;
empty_elem_tag.attributes.iter().filter_map(|attribute| { xml_content = None;
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { xml_e_name = None;
Some((ns_name, value))
} else {
None
}
})
{
let prefix = match prefix {
xml::NSAttName::PrefixedAttName(prefixed_att_name) => {
Some(prefixed_att_name.to_string())
}
xml::NSAttName::DefaultAttName => None,
};
let namespace = NamespaceDeclaration {
prefix,
namespace: namespace.process()?,
};
if !namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpaceDeclaration(namespace));
}
}
// all namespaces available to the element (from both parent elements and element itself)
let namespace_stack: Vec<&NamespaceDeclaration> = namespaces
.iter()
.flatten()
.chain(namespace_declarations.iter())
.collect();
let mut attributes = HashMap::new();
for (q_name, value) in empty_elem_tag.attributes.iter().filter_map(|attribute| {
if let xml::Attribute::Attribute { name, value } = attribute {
Some((name, value))
} else {
None
}
}) {
let namespace;
let attribute_name;
match q_name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack.iter().rfind(|namespace| {
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
});
attribute_name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
attribute_name = unprefixed_name.to_string();
}
}
if let Some(namespace) = namespace {
let namespace = (*namespace).clone();
let name = Name {
namespace: namespace.namespace,
name: attribute_name,
};
let value = value.process()?;
if let Some(_value) = attributes.insert(name, value) {
return Err(Error::DuplicateAttribute(q_name.to_string()));
}
} else {
return Err(Error::UnqualifiedNamespace(q_name.to_string()));
}
}
let name;
let namespace;
match &empty_elem_tag.name {
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack.iter().rfind(|namespace| {
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
});
name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
name = unprefixed_name.to_string();
}
}
let namespace = (*namespace
.ok_or_else(|| Error::UnqualifiedNamespace(empty_elem_tag.name.to_string()))?)
.clone();
let name = Name {
namespace: namespace.namespace,
name,
};
return Ok(Element {
name,
attributes,
content: Vec::new(),
});
} }
xml::Element::NotEmpty(s_tag, content, e_tag) => { xml::Element::NotEmpty(s_tag, content, e_tag) => {
if s_tag.name != e_tag.name { xml_name = s_tag.name;
return Err(Error::MismatchedEndTag( xml_attributes = s_tag.attributes;
s_tag.name.to_string(), xml_content = Some(content);
e_tag.name.to_string(), xml_e_name = Some(e_tag.name);
));
} }
let mut namespace_declarations = HashSet::new(); }
for (prefix, namespace) in s_tag.attributes.iter().filter_map(|attribute| {
// namespace declarations on element
let mut element_namespace_declarations = HashSet::new();
for (prefix, namespace) in xml_attributes.iter().filter_map(|attribute| {
if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute { if let xml::Attribute::NamespaceDeclaration { ns_name, value } = attribute {
Some((ns_name, value)) Some((ns_name, value))
} else { } else {
@ -568,50 +484,76 @@ impl<R> Reader<R> {
prefix, prefix,
namespace: namespace.process()?, namespace: namespace.process()?,
}; };
if !namespace_declarations.insert(namespace.clone()) { if !element_namespace_declarations.insert(namespace.clone()) {
return Err(Error::DuplicateNameSpaceDeclaration(namespace)); return Err(Error::DuplicateNameSpaceDeclaration(namespace));
} }
} }
// all namespaces available to the element (from both parent elements and element itself) // all namespaces available in the element scope (from both parent elements and element itself)
let namespace_stack: Vec<&NamespaceDeclaration> = namespaces let namespace_declarations_stack: Vec<&NamespaceDeclaration> = namespace_declarations
.iter() .iter()
.flatten() .flatten()
.chain(namespace_declarations.iter()) .chain(element_namespace_declarations.iter())
.collect(); .collect();
let mut attributes = HashMap::new(); // element name and default attribute namespace
for (q_name, value) in s_tag.attributes.iter().filter_map(|attribute| { let element_namespace_declaration;
let element_local_name = xml_name.local_part().to_string();
match xml_name.prefix() {
Some(prefix) => {
element_namespace_declaration = namespace_declarations_stack
.iter()
.rfind(|namespace| namespace.prefix.as_deref() == Some(prefix));
}
None => {
element_namespace_declaration = namespace_declarations_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
}
}
let element_default_namespace = element_namespace_declaration
.ok_or_else(|| Error::UnqualifiedNamespace(xml_name.to_string()))?
.namespace
.clone();
let element_name = Name {
namespace: element_default_namespace,
local_name: element_local_name,
};
// attributes
let mut attributes = HashMap::new();
for (q_name, value) in xml_attributes.iter().filter_map(|attribute| {
if let xml::Attribute::Attribute { name, value } = attribute { if let xml::Attribute::Attribute { name, value } = attribute {
Some((name, value)) Some((name, value))
} else { } else {
None None
} }
}) { }) {
let namespace; let attribute_namespace_declaration;
let attribute_name; let attribute_local_name = q_name.local_part().to_string();
match q_name { match q_name.prefix() {
xml::QName::PrefixedName(prefixed_name) => { Some(prefix) => {
namespace = namespace_stack.iter().rfind(|namespace| { attribute_namespace_declaration =
namespace.prefix.as_deref() == Some(**prefixed_name.prefix) namespace_declarations_stack
});
attribute_name = prefixed_name.local_part.to_string();
}
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter() .iter()
.rfind(|namespace| namespace.prefix == None); .rfind(|namespace_declaration| {
attribute_name = unprefixed_name.to_string(); namespace_declaration.prefix.as_deref() == Some(prefix)
});
} }
None => attribute_namespace_declaration = element_namespace_declaration,
} }
if let Some(namespace) = namespace { if let Some(namespace_declaration) = attribute_namespace_declaration {
let namespace = (*namespace).clone();
let name = Name { let name = Name {
namespace: namespace.namespace, namespace: namespace_declaration.namespace.clone(),
name: attribute_name, local_name: attribute_local_name,
}; };
let value = value.process()?; let value = value.process()?;
// check for duplicate attribute
if let Some(_value) = attributes.insert(name, value) { if let Some(_value) = attributes.insert(name, value) {
return Err(Error::DuplicateAttribute(q_name.to_string())); return Err(Error::DuplicateAttribute(q_name.to_string()));
} }
@ -620,54 +562,32 @@ impl<R> Reader<R> {
} }
} }
let name; let content;
let namespace; if let Some(xml_content) = xml_content {
match &s_tag.name { namespace_declarations.push(element_namespace_declarations.clone());
xml::QName::PrefixedName(prefixed_name) => {
namespace = namespace_stack.iter().rfind(|namespace| { content = Self::content_from_xml(namespace_declarations, xml_content)?;
namespace.prefix.as_deref() == Some(**prefixed_name.prefix)
}); namespace_declarations.pop();
name = prefixed_name.local_part.to_string(); } else {
content = Vec::new();
} }
xml::QName::UnprefixedName(unprefixed_name) => {
namespace = namespace_stack
.iter()
.rfind(|namespace| namespace.prefix == None);
name = unprefixed_name.to_string();
}
}
let namespace = (*namespace
.ok_or_else(|| Error::UnqualifiedNamespace(s_tag.name.to_string()))?)
.clone();
let name = Name {
namespace: namespace.namespace,
name,
};
namespaces.push(namespace_declarations.clone());
let content = Self::content_from_xml(namespaces, content)?;
namespaces.pop();
return Ok(Element { return Ok(Element {
name, name: element_name,
namespace_declarations: element_namespace_declarations,
attributes, attributes,
content, content,
}); });
} }
}
}
fn content_from_xml( fn content_from_xml(
namespaces: &mut Vec<HashSet<NamespaceDeclaration>>, namespaces: &mut Vec<HashSet<NamespaceDeclaration>>,
element: xml::Content, xml_content: xml::Content,
) -> Result<Vec<Content>> { ) -> Result<Vec<Content>> {
let mut content = Vec::new(); let mut content = Vec::new();
let mut text = element.char_data.map(|str| String::from(*str)); let mut text = xml_content.char_data.map(|str| String::from(*str));
for (content_item, char_data) in element.content { for (content_item, char_data) in xml_content.content {
match content_item { match content_item {
xml::ContentItem::Element(element) => { xml::ContentItem::Element(element) => {
text.map(|text| content.push(Content::Text(text))); text.map(|text| content.push(Content::Text(text)));