From 7653513c6f0a1da93cfcde0bff8888ebb113fdcb Mon Sep 17 00:00:00 2001 From: "D. Scott Boggs" Date: Tue, 27 Dec 2022 07:46:13 -0500 Subject: [PATCH 1/2] Wrap magic cookie in a mutex This commit also includes benchmarks proving the viability of reloading the magic database for every filetype request, should that become necessary. --- .gitignore | 1 + .vscode/settings.json | 1 + Cargo.toml | 8 +++- benches/magic.rs | 103 ++++++++++++++++++++++++++++++++++++++++++ src/mastodon.rs | 24 +++++----- 5 files changed, 125 insertions(+), 12 deletions(-) create mode 100644 benches/magic.rs diff --git a/.gitignore b/.gitignore index 7111969..812dc72 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ Cargo.lock .env mastodon-data.toml* libtest.rmeta +examples/playground.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index 2c859eb..e4e28e7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,6 +7,7 @@ "favourited", "indoc", "isolang", + "libmagic", "querystring", "reblog", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index 488c3e6..1c7970e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,6 @@ serde_json = "1" serde_qs = "0.4.5" serde_urlencoded = "0.6.1" tap-reader = "1" -tungstenite = "0.18" url = "1" # Provides parsing for the link header in get_links() in page.rs hyper-old-types = "0.11.0" @@ -79,10 +78,17 @@ tempfile = "3" # for examples: femme = "2.2.1" html2text = "0.4.4" +[dev-dependencies.criterion] +version = "0.4.0" +features = ["async_tokio"] [build-dependencies.skeptic] version = "0.13" +[[bench]] +name = "magic" +harness = false + [features] all = ["toml", "json", "env", "magic"] # default = ["reqwest/default-tls"] diff --git a/benches/magic.rs b/benches/magic.rs new file mode 100644 index 0000000..89b37cf --- /dev/null +++ b/benches/magic.rs @@ -0,0 +1,103 @@ +use std::{ + fs::{read, write}, + io::{stdout, ErrorKind, Write}, +}; + +use criterion::{criterion_group, criterion_main, Criterion}; +use magic::CookieFlags; + +async fn load_image() -> Vec { + match read("/tmp/test.png") { + Ok(img) => img, + Err(err) if err.kind() == ErrorKind::NotFound => { + let image = reqwest::Client::new() + .get("https://httpbin.org/image/png") + .header("Accept", "image/png") + .send() + .await + .expect("connection") + .error_for_status() + .expect("OK response") + .bytes() + .await + .expect("read response"); + write("/tmp/test.png", &image).expect("cache file"); + image.into() + }, + Err(err) => panic!("error reading cached PNG file: {err:?}"), + } +} + +fn load_once(c: &mut Criterion) { + let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); + cookie.load::<&str>(&[]).expect("cookie.load"); + eprintln!("file: {}", file!()); + let buf = read(file!()).expect("read"); + + c.bench_function("rust file, load once", |b| { + b.iter(|| { + // mis-detected as text/x-asm when chaining .bytes() at the beginning of a line + assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]); + }); + }); + + let image = tokio_test::block_on(async { load_image().await }); + + c.bench_function("PNG file, load once", |b| { + b.iter(|| { + assert_eq!("image/png", cookie.buffer(&image).expect("detection")); + }); + }); +} + +fn load_each_time(c: &mut Criterion) { + let buf = read(file!()).expect("read"); + + c.bench_function("rust file, load each time", |b| { + b.iter(|| { + let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); + cookie.load::<&str>(&[]).expect("cookie.load"); + assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]); + }); + }); + + let image = tokio_test::block_on(async { load_image().await }); + + c.bench_function("PNG file, load each time", |b| { + b.iter(|| { + let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); + cookie.load::<&str>(&[]).expect("cookie.load"); + assert_eq!("image/png", cookie.buffer(&image).expect("detection")); + }); + }); +} + +fn load_from_buffer_each_time(c: &mut Criterion) { + let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); + let db = read("/usr/share/file/misc/magic.mgc").expect("read database"); + let buf = read(file!()).expect("read"); + + c.bench_function("rust file, load from buffer each time", |b| { + b.iter(|| { + cookie.load_buffers(&[&db]).expect("cookie.load_buffers"); + assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]); + }); + }); + + let image = tokio_test::block_on(async { load_image().await }); + + c.bench_function("PNG file, load from buffer each time", |b| { + b.iter(|| { + cookie.load_buffers(&[&db]).expect("cookie.load_buffers"); + assert_eq!("image/png", cookie.buffer(&image).expect("detection")); + }); + }); +} + +criterion_group!( + magic_bench, + load_once, + load_each_time, + load_from_buffer_each_time +); +criterion_main!(magic_bench); diff --git a/src/mastodon.rs b/src/mastodon.rs index 0cb0b6b..b2c4c9d 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -23,12 +23,15 @@ use crate::{ }; use futures::TryStream; use log::{as_debug, as_serde, debug, error, trace}; -#[cfg(feature = "magic")] -use magic::CookieFlags; use reqwest::{multipart::Part, Client, RequestBuilder}; use url::Url; use uuid::Uuid; +#[cfg(feature = "magic")] +use magic::CookieFlags; +#[cfg(feature = "magic")] +use std::sync::Mutex; + /// The Mastodon client is a smart pointer to this struct #[derive(Debug)] pub struct MastodonClient { @@ -37,7 +40,7 @@ pub struct MastodonClient { pub data: Data, /// A handle to access libmagic for mime-types. #[cfg(feature = "magic")] - magic: magic::Cookie, + magic: Mutex, } /// Your mastodon application client, handles all requests to and from Mastodon. @@ -56,12 +59,11 @@ impl From for Mastodon { /// Creates a mastodon instance from the data struct. #[cfg(feature = "magic")] fn from(data: Data) -> Mastodon { - MastodonClient { - client: Client::new(), + Mastodon::new_with_magic( + Client::new(), data, - magic: Self::default_magic().expect("failed to open magic cookie or load database"), - } - .into() + Self::default_magic().expect("failed to open magic cookie or load database"), + ) } /// Creates a mastodon instance from the data struct. @@ -202,7 +204,7 @@ impl Mastodon { Mastodon(Arc::new(MastodonClient { client, data, - magic, + magic: Mutex::new(magic), })) } @@ -405,7 +407,7 @@ impl Mastodon { // if it doesn't work, it's no big deal. The server will look at // the filepath if this isn't here and things should still work. #[cfg(feature = "magic")] - let mime = self.magic.file(path).ok(); + let mime = self.magic.lock().expect("mutex lock").file(path).ok(); #[cfg(not(feature = "magic"))] let mime: Option = None; @@ -419,7 +421,7 @@ impl Mastodon { file.read_to_end(&mut data)?; let part = Part::bytes(data).file_name(Cow::Owned(path.to_string_lossy().to_string())); - Ok(if let Some(mime) = mime { + Ok(if let Some(mime) = &mime { part.mime_str(&mime)? } else { part From 4fb24a5a216502af4af0f7502485adaddff61ac6 Mon Sep 17 00:00:00 2001 From: "D. Scott Boggs" Date: Wed, 28 Dec 2022 07:58:40 -0500 Subject: [PATCH 2/2] Drop the magic bullshit There were too many false positive responses from libmagic to have this just happen in the background. In a future release I may add a specific error which is returned when the given file has no extension, and downstream application developers can use that as an indicator that they should ask the user if they want to detect the filetype, making use of libmagic directly in the downstream application as appropriate. --- .vscode/settings.json | 1 - Cargo.toml | 15 ++---- benches/magic.rs | 103 ------------------------------------------ src/errors.rs | 13 +----- src/macros.rs | 8 ++-- src/mastodon.rs | 101 +++++------------------------------------ 6 files changed, 21 insertions(+), 220 deletions(-) delete mode 100644 benches/magic.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index e4e28e7..2c859eb 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,7 +7,6 @@ "favourited", "indoc", "isolang", - "libmagic", "querystring", "reblog", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index 1c7970e..93e6653 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mastodon-async" -version = "1.0.2" +version = "1.0.3" authors = ["Aaron Power ", "Paul Woolcock ", "D. Scott Boggs "] description = "A wrapper around the Mastodon API." readme = "README.md" @@ -24,10 +24,7 @@ url = "1" # Provides parsing for the link header in get_links() in page.rs hyper-old-types = "0.11.0" futures-util = "0.3.25" - -[dependencies.magic] -version = "0.13.0" -optional = true +static_assertions = "1.1.0" [dependencies.uuid] version = "1.2.2" @@ -85,14 +82,10 @@ features = ["async_tokio"] [build-dependencies.skeptic] version = "0.13" -[[bench]] -name = "magic" -harness = false - [features] -all = ["toml", "json", "env", "magic"] +all = ["toml", "json", "env"] # default = ["reqwest/default-tls"] -default = ["reqwest/default-tls", "magic"] +default = ["reqwest/default-tls"] env = ["envy"] json = [] rustls-tls = ["reqwest/rustls-tls"] diff --git a/benches/magic.rs b/benches/magic.rs deleted file mode 100644 index 89b37cf..0000000 --- a/benches/magic.rs +++ /dev/null @@ -1,103 +0,0 @@ -use std::{ - fs::{read, write}, - io::{stdout, ErrorKind, Write}, -}; - -use criterion::{criterion_group, criterion_main, Criterion}; -use magic::CookieFlags; - -async fn load_image() -> Vec { - match read("/tmp/test.png") { - Ok(img) => img, - Err(err) if err.kind() == ErrorKind::NotFound => { - let image = reqwest::Client::new() - .get("https://httpbin.org/image/png") - .header("Accept", "image/png") - .send() - .await - .expect("connection") - .error_for_status() - .expect("OK response") - .bytes() - .await - .expect("read response"); - write("/tmp/test.png", &image).expect("cache file"); - image.into() - }, - Err(err) => panic!("error reading cached PNG file: {err:?}"), - } -} - -fn load_once(c: &mut Criterion) { - let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); - cookie.load::<&str>(&[]).expect("cookie.load"); - eprintln!("file: {}", file!()); - let buf = read(file!()).expect("read"); - - c.bench_function("rust file, load once", |b| { - b.iter(|| { - // mis-detected as text/x-asm when chaining .bytes() at the beginning of a line - assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]); - }); - }); - - let image = tokio_test::block_on(async { load_image().await }); - - c.bench_function("PNG file, load once", |b| { - b.iter(|| { - assert_eq!("image/png", cookie.buffer(&image).expect("detection")); - }); - }); -} - -fn load_each_time(c: &mut Criterion) { - let buf = read(file!()).expect("read"); - - c.bench_function("rust file, load each time", |b| { - b.iter(|| { - let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); - cookie.load::<&str>(&[]).expect("cookie.load"); - assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]); - }); - }); - - let image = tokio_test::block_on(async { load_image().await }); - - c.bench_function("PNG file, load each time", |b| { - b.iter(|| { - let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); - cookie.load::<&str>(&[]).expect("cookie.load"); - assert_eq!("image/png", cookie.buffer(&image).expect("detection")); - }); - }); -} - -fn load_from_buffer_each_time(c: &mut Criterion) { - let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); - let db = read("/usr/share/file/misc/magic.mgc").expect("read database"); - let buf = read(file!()).expect("read"); - - c.bench_function("rust file, load from buffer each time", |b| { - b.iter(|| { - cookie.load_buffers(&[&db]).expect("cookie.load_buffers"); - assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]); - }); - }); - - let image = tokio_test::block_on(async { load_image().await }); - - c.bench_function("PNG file, load from buffer each time", |b| { - b.iter(|| { - cookie.load_buffers(&[&db]).expect("cookie.load_buffers"); - assert_eq!("image/png", cookie.buffer(&image).expect("detection")); - }); - }); -} - -criterion_group!( - magic_bench, - load_once, - load_each_time, - load_from_buffer_each_time -); -criterion_main!(magic_bench); diff --git a/src/errors.rs b/src/errors.rs index 9b7e7e5..d87fb9d 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -3,8 +3,6 @@ use std::{error, fmt, io::Error as IoError, num::TryFromIntError}; #[cfg(feature = "env")] use envy::Error as EnvyError; use hyper_old_types::Error as HeaderParseError; -#[cfg(feature = "magic")] -use magic::MagicError; use reqwest::{header::ToStrError as HeaderStrError, Error as HttpError, StatusCode}; use serde::Deserialize; use serde_json::Error as SerdeError; @@ -70,9 +68,6 @@ pub enum Error { /// At the time of writing, this can only be triggered when a file is /// larger than the system's usize allows. IntConversion(TryFromIntError), - #[cfg(feature = "magic")] - /// An error received from the magic crate - Magic(MagicError), /// Other errors Other(String), } @@ -102,11 +97,7 @@ impl error::Error for Error { Envy(ref e) => Some(e), SerdeQs(ref e) => Some(e), IntConversion(ref e) => Some(e), - #[cfg(feature = "magic")] - Magic(ref e) => Some(e), - Api { - .. - } + Api { .. } | ClientIdRequired | ClientSecretRequired | AccessTokenRequired @@ -164,8 +155,6 @@ from! { SerdeQsError => SerdeQs, String => Other, TryFromIntError => IntConversion, - #[cfg(feature = "magic")] - MagicError => Magic, } #[macro_export] diff --git a/src/macros.rs b/src/macros.rs index 2c7dad4..85b6c5e 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -173,7 +173,7 @@ macro_rules! route_v2 { let form_data = Form::new() $( - .part(stringify!($param), self.get_form_part($param)?) + .part(stringify!($param), Self::get_form_part($param)?) )*; let form_data = if let Some(description) = description { @@ -217,7 +217,7 @@ macro_rules! route_v2 { let form_data = Form::new() $( - .part(stringify!($param), self.get_form_part($param)?) + .part(stringify!($param), Self::get_form_part($param)?) )*; let url = &self.route(concat!("/api/v2/", $url)); @@ -261,7 +261,7 @@ macro_rules! route { let form_data = Form::new() $( - .part(stringify!($param), self.get_form_part($param)?) + .part(stringify!($param), Self::get_form_part($param)?) )*; let url = &self.route(concat!("/api/v1/", $url)); @@ -302,7 +302,7 @@ macro_rules! route { let form_data = Form::new() $( - .part(stringify!($param), self.get_form_part($param)?) + .part(stringify!($param), Self::get_form_part($param)?) )*; let form_data = if let Some(description) = description { diff --git a/src/mastodon.rs b/src/mastodon.rs index b2c4c9d..a437867 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -11,15 +11,8 @@ use crate::{ errors::{Error, Result}, event_stream::event_stream, helpers::read_response::read_response, - log_serde, - AddFilterRequest, - AddPushRequest, - Data, - NewStatus, - Page, - StatusesRequest, - UpdateCredsRequest, - UpdatePushRequest, + log_serde, AddFilterRequest, AddPushRequest, Data, NewStatus, Page, StatusesRequest, + UpdateCredsRequest, UpdatePushRequest, }; use futures::TryStream; use log::{as_debug, as_serde, debug, error, trace}; @@ -27,26 +20,21 @@ use reqwest::{multipart::Part, Client, RequestBuilder}; use url::Url; use uuid::Uuid; -#[cfg(feature = "magic")] -use magic::CookieFlags; -#[cfg(feature = "magic")] -use std::sync::Mutex; - /// The Mastodon client is a smart pointer to this struct #[derive(Debug)] pub struct MastodonClient { pub(crate) client: Client, /// Raw data about your mastodon instance. pub data: Data, - /// A handle to access libmagic for mime-types. - #[cfg(feature = "magic")] - magic: Mutex, } /// Your mastodon application client, handles all requests to and from Mastodon. #[derive(Debug, Clone)] pub struct Mastodon(Arc); +// This ensures we don't accidentally make Mastodon not Send or Sync again +static_assertions::assert_impl_all!(Mastodon: Send, Sync); + /// A client for making unauthenticated requests to the public API. #[derive(Clone, Debug)] pub struct MastodonUnauthenticated { @@ -57,23 +45,8 @@ pub struct MastodonUnauthenticated { impl From for Mastodon { /// Creates a mastodon instance from the data struct. - #[cfg(feature = "magic")] fn from(data: Data) -> Mastodon { - Mastodon::new_with_magic( - Client::new(), - data, - Self::default_magic().expect("failed to open magic cookie or load database"), - ) - } - - /// Creates a mastodon instance from the data struct. - #[cfg(not(feature = "magic"))] - fn from(data: Data) -> Mastodon { - MastodonClient { - client: Client::new(), - data, - } - .into() + Mastodon::new(Client::new(), data) } } impl Mastodon { @@ -175,46 +148,9 @@ impl Mastodon { stream_direct@"direct", } - /// Return a magic cookie, loaded with the default mime - #[cfg(feature = "magic")] - fn default_magic() -> Result { - let magic = magic::Cookie::open(Default::default())?; - magic.load::<&str>(&[])?; - magic.set_flags(CookieFlags::MIME)?; - Ok(magic) - } - - /// Create a new Mastodon Client - #[cfg(feature = "magic")] + /// A new instance. pub fn new(client: Client, data: Data) -> Self { - Self::new_with_magic( - client, - data, - Self::default_magic().expect("failed to open magic cookie or load database"), - ) - } - - /// Create a new Mastodon Client, passing in a pre-constructed magic - /// cookie. - /// - /// This is mainly here so you have a wait to construct the client which - /// won't panic. - #[cfg(feature = "magic")] - pub fn new_with_magic(client: Client, data: Data, magic: magic::Cookie) -> Self { - Mastodon(Arc::new(MastodonClient { - client, - data, - magic: Mutex::new(magic), - })) - } - - /// Create a new Mastodon Client - #[cfg(not(feature = "magic"))] - pub fn new(client: Client, data: Data) -> Self { - Mastodon(Arc::new(MastodonClient { - client, - data, - })) + Mastodon(Arc::new(MastodonClient { client, data })) } fn route(&self, url: &str) -> String { @@ -397,20 +333,12 @@ impl Mastodon { } /// Return a part for a multipart form submission from a file, including - /// the name of the file, and, if the "magic" feature is enabled, the mime- - /// type. - fn get_form_part(&self, path: impl AsRef) -> Result { + /// the name of the file. + fn get_form_part(path: impl AsRef) -> Result { use std::io::Read; let path = path.as_ref(); - // if it doesn't work, it's no big deal. The server will look at - // the filepath if this isn't here and things should still work. - #[cfg(feature = "magic")] - let mime = self.magic.lock().expect("mutex lock").file(path).ok(); - #[cfg(not(feature = "magic"))] - let mime: Option = None; - match std::fs::File::open(path) { Ok(mut file) => { let mut data = if let Ok(metadata) = file.metadata() { @@ -419,13 +347,8 @@ impl Mastodon { vec![] }; file.read_to_end(&mut data)?; - let part = - Part::bytes(data).file_name(Cow::Owned(path.to_string_lossy().to_string())); - Ok(if let Some(mime) = &mime { - part.mime_str(&mime)? - } else { - part - }) + // TODO extract filename, error on dirs, etc. + Ok(Part::bytes(data).file_name(Cow::Owned(path.to_string_lossy().to_string()))) }, Err(err) => { error!(path = as_debug!(path), error = as_debug!(err); "error reading file contents for multipart form");