From fe389c38ca2ff23e3f2bd2305b455e4d551a9ccc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?cel=20=F0=9F=8C=B8?= <cel@bunny.garden>
Date: Tue, 25 Feb 2025 18:08:20 +0000
Subject: [PATCH] implement proper errors with thiserror

---
 Cargo.lock     | 31 +++++++++++++++---
 Cargo.toml     |  1 +
 src/element.rs | 20 +++++++-----
 src/error.rs   | 85 +++++++++++++++++++++++++-------------------------
 src/xml/mod.rs | 16 +++++-----
 5 files changed, 91 insertions(+), 62 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2cb6eb2..89ef86b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,6 +1,6 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-version = 3
+version = 4
 
 [[package]]
 name = "addr2line"
@@ -294,6 +294,7 @@ dependencies = [
  "futures",
  "futures-util",
  "nom",
+ "thiserror",
  "tokio",
  "tracing",
 ]
@@ -312,9 +313,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.78"
+version = "1.0.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
+checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
 dependencies = [
  "unicode-ident",
 ]
@@ -385,15 +386,35 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.52"
+version = "2.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
+checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
 dependencies = [
  "proc-macro2",
  "quote",
  "unicode-ident",
 ]
 
+[[package]]
+name = "thiserror"
+version = "2.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "tokio"
 version = "1.36.0"
diff --git a/Cargo.toml b/Cargo.toml
index 40e23bc..25f29a5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,5 +11,6 @@ circular = { version = "0.3.0", path = "../circular" }
 futures = "0.3.30"
 futures-util = { version = "0.3.31", features = ["sink"] }
 nom = "7.1.3"
+thiserror = "2.0.11"
 tokio = { version = "1.36.0", features = ["io-util", "net", "io-std", "full"] }
 tracing = "0.1.41"
diff --git a/src/element.rs b/src/element.rs
index 48d4d90..b954e2d 100644
--- a/src/element.rs
+++ b/src/element.rs
@@ -89,9 +89,10 @@ impl Element {
         if self.name.local_name == name {
             Ok(())
         } else {
-            return Err(DeserializeError::IncorrectName(
-                self.name.local_name.clone(),
-            ));
+            return Err(DeserializeError::IncorrectName {
+                expected: name.to_string(),
+                found: self.name.local_name.clone(),
+            });
         }
     }
 
@@ -99,10 +100,15 @@ impl Element {
         if self.name.namespace.as_deref() == Some(namespace) {
             return Ok(());
         } else {
-            if let Some(namespace) = &self.name.namespace {
-                return Err(DeserializeError::IncorrectNamespace(namespace.clone()));
+            if let Some(actual_namespace) = &self.name.namespace {
+                return Err(DeserializeError::IncorrectNamespace {
+                    expected: namespace.to_string(),
+                    found: actual_namespace.clone(),
+                });
             } else {
-                return Err(DeserializeError::Unqualified);
+                return Err(DeserializeError::Unqualified {
+                    expected: namespace.to_string(),
+                });
             }
         }
     }
@@ -358,7 +364,7 @@ impl Element {
             let child = self
                 .content
                 .pop_front()
-                .ok_or(DeserializeError::MissingChild)?;
+                .ok_or(DeserializeError::MissingValue)?;
             match child {
                 Content::Element(_) => {
                     return Err(DeserializeError::UnexpectedContent(self.content.clone()))
diff --git a/src/error.rs b/src/error.rs
index cf01895..65a1503 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,71 +1,72 @@
 use std::{
     collections::{HashMap, VecDeque},
     num::ParseIntError,
-    str::{FromStr, Utf8Error},
+    str::Utf8Error,
 };
 
-use crate::{
-    element::{Content, Name, NamespaceDeclaration},
-    Element,
-};
+use thiserror::Error;
 
-#[derive(Debug)]
+use crate::element::{Content, Name, NamespaceDeclaration};
+
+#[derive(Error, Debug)]
 pub enum DeserializeError {
+    #[error("could not parse string {0:?} to requested value")]
     FromStr(String),
+    #[error("unexpected attributes {0:?}")]
     UnexpectedAttributes(HashMap<Name, String>),
+    #[error("unexpected element content: {0:?}")]
     UnexpectedContent(VecDeque<Content>),
-    UnexpectedElement(Element),
+    #[error("attribute `{0:?}` missing")]
     MissingAttribute(Name),
-    IncorrectName(String),
-    IncorrectNamespace(String),
-    Unqualified,
+    #[error("incorrect localname: expected `{expected:?}`, found `{found:?}`")]
+    IncorrectName { expected: String, found: String },
+    #[error("incorrect namespace: expected `{expected:?}`, found `{found:?}`")]
+    IncorrectNamespace { expected: String, found: String },
+    #[error("unqualified namespace: expected `{expected:?}`")]
+    Unqualified { expected: String },
+    #[error("element missing expected child")]
     MissingChild,
+    #[error("element missing expected text value")]
     MissingValue,
 }
 
-#[derive(Debug)]
-// TODO: thiserror
+#[derive(Error, Debug)]
 pub enum Error {
-    ReadError(std::io::Error),
-    Utf8Error(Utf8Error),
+    #[error(transparent)]
+    ReadError(#[from] std::io::Error),
+    #[error(transparent)]
+    Utf8Error(#[from] Utf8Error),
+    #[error("nom parse error: {0}")]
     ParseError(String),
+    #[error("unknown xml entity reference `&{0};`")]
     EntityProcessError(String),
-    // TODO: better choice for failures than string
-    InvalidCharRef(String),
+    #[error(transparent)]
+    InvalidCharRef(CharRefError),
+    #[error("duplicate namespace declaration: {0:?}")]
     DuplicateNameSpaceDeclaration(NamespaceDeclaration),
+    #[error("duplicate attribute: {0}")]
     DuplicateAttribute(String),
-    UnqualifiedNamespace(String),
+    #[error("mismatched end tag: expected `{0:?}`, found `{1:?}`")]
     MismatchedEndTag(Name, Name),
+    #[error("not currently in any element")]
     NotInElement(String),
+    #[error("extra unexpected data included in complete parse: `{0}`")]
     ExtraData(String),
+    #[error("namespace `{0}` has not previously been declared")]
     UndeclaredNamespace(String),
-    IncorrectName(Name),
-    DeserializeError(String),
-    Deserialize(DeserializeError),
+    #[error(transparent)]
+    Deserialize(#[from] DeserializeError),
     /// root element end tag already processed
+    #[error("root element has already been fully processed")]
     RootElementEnded,
 }
 
-impl From<DeserializeError> for Error {
-    fn from(e: DeserializeError) -> Self {
-        Self::Deserialize(e)
-    }
-}
-
-impl From<std::io::Error> for Error {
-    fn from(e: std::io::Error) -> Self {
-        Self::ReadError(e)
-    }
-}
-
-impl From<Utf8Error> for Error {
-    fn from(e: Utf8Error) -> Self {
-        Self::Utf8Error(e)
-    }
-}
-
-impl From<ParseIntError> for Error {
-    fn from(e: ParseIntError) -> Self {
-        Self::InvalidCharRef(e.to_string())
-    }
+#[derive(Error, Debug)]
+pub enum CharRefError {
+    #[error(transparent)]
+    ParseInt(#[from] ParseIntError),
+    #[error("u32 `{0}` does not represent a valid char")]
+    IntegerNotAChar(u32),
+    #[error("`{0}` is not a valid xml char")]
+    InvalidXMLChar(char),
 }
diff --git a/src/xml/mod.rs b/src/xml/mod.rs
index 43f3027..3982070 100644
--- a/src/xml/mod.rs
+++ b/src/xml/mod.rs
@@ -2,7 +2,7 @@ use std::{char, convert::Infallible, ops::Deref, str::FromStr};
 
 use parsers_complete::Parser;
 
-use crate::error::Error;
+use crate::error::{CharRefError, Error};
 
 pub mod composers;
 pub mod parsers;
@@ -736,23 +736,23 @@ impl<'s> CharRef<'s> {
         let int: u32;
         match self {
             CharRef::Decimal(dec) => {
-                int = dec.parse()?;
+                int = dec
+                    .parse()
+                    .map_err(|e| Error::InvalidCharRef(CharRefError::ParseInt(e)))?;
             }
             CharRef::Hexadecimal(hex) => {
-                int = <u32>::from_str_radix(hex, 16)?;
+                int = <u32>::from_str_radix(hex, 16)
+                    .map_err(|e| Error::InvalidCharRef(CharRefError::ParseInt(e)))?;
             }
         }
         let c = std::char::from_u32(int);
 
-        let c = c.ok_or_else(|| Error::InvalidCharRef(int.to_string()))?;
+        let c = c.ok_or_else(|| Error::InvalidCharRef(CharRefError::IntegerNotAChar(int)))?;
         if matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}')
         {
             return Ok(c);
         } else {
-            return Err(Error::InvalidCharRef(format!(
-                "{} is not a valid xml char",
-                c
-            )));
+            return Err(Error::InvalidCharRef(CharRefError::InvalidXMLChar(c)));
         };
     }
 }