From 7653513c6f0a1da93cfcde0bff8888ebb113fdcb Mon Sep 17 00:00:00 2001 From: "D. Scott Boggs" Date: Tue, 27 Dec 2022 07:46:13 -0500 Subject: [PATCH] Wrap magic cookie in a mutex This commit also includes benchmarks proving the viability of reloading the magic database for every filetype request, should that become necessary. --- .gitignore | 1 + .vscode/settings.json | 1 + Cargo.toml | 8 +++- benches/magic.rs | 103 ++++++++++++++++++++++++++++++++++++++++++ src/mastodon.rs | 24 +++++----- 5 files changed, 125 insertions(+), 12 deletions(-) create mode 100644 benches/magic.rs diff --git a/.gitignore b/.gitignore index 7111969..812dc72 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ Cargo.lock .env mastodon-data.toml* libtest.rmeta +examples/playground.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index 2c859eb..e4e28e7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,6 +7,7 @@ "favourited", "indoc", "isolang", + "libmagic", "querystring", "reblog", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index 488c3e6..1c7970e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,6 @@ serde_json = "1" serde_qs = "0.4.5" serde_urlencoded = "0.6.1" tap-reader = "1" -tungstenite = "0.18" url = "1" # Provides parsing for the link header in get_links() in page.rs hyper-old-types = "0.11.0" @@ -79,10 +78,17 @@ tempfile = "3" # for examples: femme = "2.2.1" html2text = "0.4.4" +[dev-dependencies.criterion] +version = "0.4.0" +features = ["async_tokio"] [build-dependencies.skeptic] version = "0.13" +[[bench]] +name = "magic" +harness = false + [features] all = ["toml", "json", "env", "magic"] # default = ["reqwest/default-tls"] diff --git a/benches/magic.rs b/benches/magic.rs new file mode 100644 index 0000000..89b37cf --- /dev/null +++ b/benches/magic.rs @@ -0,0 +1,103 @@ +use std::{ + fs::{read, write}, + io::{stdout, ErrorKind, Write}, +}; + +use criterion::{criterion_group, criterion_main, Criterion}; +use magic::CookieFlags; + +async fn load_image() -> Vec { + match read("/tmp/test.png") { + Ok(img) => img, + Err(err) if err.kind() == ErrorKind::NotFound => { + let image = reqwest::Client::new() + .get("https://httpbin.org/image/png") + .header("Accept", "image/png") + .send() + .await + .expect("connection") + .error_for_status() + .expect("OK response") + .bytes() + .await + .expect("read response"); + write("/tmp/test.png", &image).expect("cache file"); + image.into() + }, + Err(err) => panic!("error reading cached PNG file: {err:?}"), + } +} + +fn load_once(c: &mut Criterion) { + let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); + cookie.load::<&str>(&[]).expect("cookie.load"); + eprintln!("file: {}", file!()); + let buf = read(file!()).expect("read"); + + c.bench_function("rust file, load once", |b| { + b.iter(|| { + // mis-detected as text/x-asm when chaining .bytes() at the beginning of a line + assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]); + }); + }); + + let image = tokio_test::block_on(async { load_image().await }); + + c.bench_function("PNG file, load once", |b| { + b.iter(|| { + assert_eq!("image/png", cookie.buffer(&image).expect("detection")); + }); + }); +} + +fn load_each_time(c: &mut Criterion) { + let buf = read(file!()).expect("read"); + + c.bench_function("rust file, load each time", |b| { + b.iter(|| { + let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); + cookie.load::<&str>(&[]).expect("cookie.load"); + assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]); + }); + }); + + let image = tokio_test::block_on(async { load_image().await }); + + c.bench_function("PNG file, load each time", |b| { + b.iter(|| { + let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); + cookie.load::<&str>(&[]).expect("cookie.load"); + assert_eq!("image/png", cookie.buffer(&image).expect("detection")); + }); + }); +} + +fn load_from_buffer_each_time(c: &mut Criterion) { + let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open"); + let db = read("/usr/share/file/misc/magic.mgc").expect("read database"); + let buf = read(file!()).expect("read"); + + c.bench_function("rust file, load from buffer each time", |b| { + b.iter(|| { + cookie.load_buffers(&[&db]).expect("cookie.load_buffers"); + assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]); + }); + }); + + let image = tokio_test::block_on(async { load_image().await }); + + c.bench_function("PNG file, load from buffer each time", |b| { + b.iter(|| { + cookie.load_buffers(&[&db]).expect("cookie.load_buffers"); + assert_eq!("image/png", cookie.buffer(&image).expect("detection")); + }); + }); +} + +criterion_group!( + magic_bench, + load_once, + load_each_time, + load_from_buffer_each_time +); +criterion_main!(magic_bench); diff --git a/src/mastodon.rs b/src/mastodon.rs index 0cb0b6b..b2c4c9d 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -23,12 +23,15 @@ use crate::{ }; use futures::TryStream; use log::{as_debug, as_serde, debug, error, trace}; -#[cfg(feature = "magic")] -use magic::CookieFlags; use reqwest::{multipart::Part, Client, RequestBuilder}; use url::Url; use uuid::Uuid; +#[cfg(feature = "magic")] +use magic::CookieFlags; +#[cfg(feature = "magic")] +use std::sync::Mutex; + /// The Mastodon client is a smart pointer to this struct #[derive(Debug)] pub struct MastodonClient { @@ -37,7 +40,7 @@ pub struct MastodonClient { pub data: Data, /// A handle to access libmagic for mime-types. #[cfg(feature = "magic")] - magic: magic::Cookie, + magic: Mutex, } /// Your mastodon application client, handles all requests to and from Mastodon. @@ -56,12 +59,11 @@ impl From for Mastodon { /// Creates a mastodon instance from the data struct. #[cfg(feature = "magic")] fn from(data: Data) -> Mastodon { - MastodonClient { - client: Client::new(), + Mastodon::new_with_magic( + Client::new(), data, - magic: Self::default_magic().expect("failed to open magic cookie or load database"), - } - .into() + Self::default_magic().expect("failed to open magic cookie or load database"), + ) } /// Creates a mastodon instance from the data struct. @@ -202,7 +204,7 @@ impl Mastodon { Mastodon(Arc::new(MastodonClient { client, data, - magic, + magic: Mutex::new(magic), })) } @@ -405,7 +407,7 @@ impl Mastodon { // if it doesn't work, it's no big deal. The server will look at // the filepath if this isn't here and things should still work. #[cfg(feature = "magic")] - let mime = self.magic.file(path).ok(); + let mime = self.magic.lock().expect("mutex lock").file(path).ok(); #[cfg(not(feature = "magic"))] let mime: Option = None; @@ -419,7 +421,7 @@ impl Mastodon { file.read_to_end(&mut data)?; let part = Part::bytes(data).file_name(Cow::Owned(path.to_string_lossy().to_string())); - Ok(if let Some(mime) = mime { + Ok(if let Some(mime) = &mime { part.mime_str(&mime)? } else { part