Drop the magic bullshit

There were too many false positive responses from libmagic to have this
just happen in the background. In a future release I may add a specific
error which is returned when the given file has no extension, and
downstream application developers can use that as an indicator that they
should ask the user if they want to detect the filetype, making use of
libmagic directly in the downstream application as appropriate.
This commit is contained in:
D. Scott Boggs 2022-12-28 07:58:40 -05:00
parent 7653513c6f
commit 4fb24a5a21
6 changed files with 21 additions and 220 deletions

View File

@ -7,7 +7,6 @@
"favourited", "favourited",
"indoc", "indoc",
"isolang", "isolang",
"libmagic",
"querystring", "querystring",
"reblog", "reblog",
"reqwest", "reqwest",

View File

@ -1,6 +1,6 @@
[package] [package]
name = "mastodon-async" name = "mastodon-async"
version = "1.0.2" version = "1.0.3"
authors = ["Aaron Power <theaaronepower@gmail.com>", "Paul Woolcock <paul@woolcock.us>", "D. Scott Boggs <scott@tams.tech>"] authors = ["Aaron Power <theaaronepower@gmail.com>", "Paul Woolcock <paul@woolcock.us>", "D. Scott Boggs <scott@tams.tech>"]
description = "A wrapper around the Mastodon API." description = "A wrapper around the Mastodon API."
readme = "README.md" readme = "README.md"
@ -24,10 +24,7 @@ url = "1"
# Provides parsing for the link header in get_links() in page.rs # Provides parsing for the link header in get_links() in page.rs
hyper-old-types = "0.11.0" hyper-old-types = "0.11.0"
futures-util = "0.3.25" futures-util = "0.3.25"
static_assertions = "1.1.0"
[dependencies.magic]
version = "0.13.0"
optional = true
[dependencies.uuid] [dependencies.uuid]
version = "1.2.2" version = "1.2.2"
@ -85,14 +82,10 @@ features = ["async_tokio"]
[build-dependencies.skeptic] [build-dependencies.skeptic]
version = "0.13" version = "0.13"
[[bench]]
name = "magic"
harness = false
[features] [features]
all = ["toml", "json", "env", "magic"] all = ["toml", "json", "env"]
# default = ["reqwest/default-tls"] # default = ["reqwest/default-tls"]
default = ["reqwest/default-tls", "magic"] default = ["reqwest/default-tls"]
env = ["envy"] env = ["envy"]
json = [] json = []
rustls-tls = ["reqwest/rustls-tls"] rustls-tls = ["reqwest/rustls-tls"]

View File

@ -1,103 +0,0 @@
use std::{
fs::{read, write},
io::{stdout, ErrorKind, Write},
};
use criterion::{criterion_group, criterion_main, Criterion};
use magic::CookieFlags;
async fn load_image() -> Vec<u8> {
match read("/tmp/test.png") {
Ok(img) => img,
Err(err) if err.kind() == ErrorKind::NotFound => {
let image = reqwest::Client::new()
.get("https://httpbin.org/image/png")
.header("Accept", "image/png")
.send()
.await
.expect("connection")
.error_for_status()
.expect("OK response")
.bytes()
.await
.expect("read response");
write("/tmp/test.png", &image).expect("cache file");
image.into()
},
Err(err) => panic!("error reading cached PNG file: {err:?}"),
}
}
fn load_once(c: &mut Criterion) {
let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open");
cookie.load::<&str>(&[]).expect("cookie.load");
eprintln!("file: {}", file!());
let buf = read(file!()).expect("read");
c.bench_function("rust file, load once", |b| {
b.iter(|| {
// mis-detected as text/x-asm when chaining .bytes() at the beginning of a line
assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]);
});
});
let image = tokio_test::block_on(async { load_image().await });
c.bench_function("PNG file, load once", |b| {
b.iter(|| {
assert_eq!("image/png", cookie.buffer(&image).expect("detection"));
});
});
}
fn load_each_time(c: &mut Criterion) {
let buf = read(file!()).expect("read");
c.bench_function("rust file, load each time", |b| {
b.iter(|| {
let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open");
cookie.load::<&str>(&[]).expect("cookie.load");
assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]);
});
});
let image = tokio_test::block_on(async { load_image().await });
c.bench_function("PNG file, load each time", |b| {
b.iter(|| {
let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open");
cookie.load::<&str>(&[]).expect("cookie.load");
assert_eq!("image/png", cookie.buffer(&image).expect("detection"));
});
});
}
fn load_from_buffer_each_time(c: &mut Criterion) {
let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open");
let db = read("/usr/share/file/misc/magic.mgc").expect("read database");
let buf = read(file!()).expect("read");
c.bench_function("rust file, load from buffer each time", |b| {
b.iter(|| {
cookie.load_buffers(&[&db]).expect("cookie.load_buffers");
assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]);
});
});
let image = tokio_test::block_on(async { load_image().await });
c.bench_function("PNG file, load from buffer each time", |b| {
b.iter(|| {
cookie.load_buffers(&[&db]).expect("cookie.load_buffers");
assert_eq!("image/png", cookie.buffer(&image).expect("detection"));
});
});
}
criterion_group!(
magic_bench,
load_once,
load_each_time,
load_from_buffer_each_time
);
criterion_main!(magic_bench);

View File

@ -3,8 +3,6 @@ use std::{error, fmt, io::Error as IoError, num::TryFromIntError};
#[cfg(feature = "env")] #[cfg(feature = "env")]
use envy::Error as EnvyError; use envy::Error as EnvyError;
use hyper_old_types::Error as HeaderParseError; use hyper_old_types::Error as HeaderParseError;
#[cfg(feature = "magic")]
use magic::MagicError;
use reqwest::{header::ToStrError as HeaderStrError, Error as HttpError, StatusCode}; use reqwest::{header::ToStrError as HeaderStrError, Error as HttpError, StatusCode};
use serde::Deserialize; use serde::Deserialize;
use serde_json::Error as SerdeError; use serde_json::Error as SerdeError;
@ -70,9 +68,6 @@ pub enum Error {
/// At the time of writing, this can only be triggered when a file is /// At the time of writing, this can only be triggered when a file is
/// larger than the system's usize allows. /// larger than the system's usize allows.
IntConversion(TryFromIntError), IntConversion(TryFromIntError),
#[cfg(feature = "magic")]
/// An error received from the magic crate
Magic(MagicError),
/// Other errors /// Other errors
Other(String), Other(String),
} }
@ -102,11 +97,7 @@ impl error::Error for Error {
Envy(ref e) => Some(e), Envy(ref e) => Some(e),
SerdeQs(ref e) => Some(e), SerdeQs(ref e) => Some(e),
IntConversion(ref e) => Some(e), IntConversion(ref e) => Some(e),
#[cfg(feature = "magic")] Api { .. }
Magic(ref e) => Some(e),
Api {
..
}
| ClientIdRequired | ClientIdRequired
| ClientSecretRequired | ClientSecretRequired
| AccessTokenRequired | AccessTokenRequired
@ -164,8 +155,6 @@ from! {
SerdeQsError => SerdeQs, SerdeQsError => SerdeQs,
String => Other, String => Other,
TryFromIntError => IntConversion, TryFromIntError => IntConversion,
#[cfg(feature = "magic")]
MagicError => Magic,
} }
#[macro_export] #[macro_export]

View File

@ -173,7 +173,7 @@ macro_rules! route_v2 {
let form_data = Form::new() let form_data = Form::new()
$( $(
.part(stringify!($param), self.get_form_part($param)?) .part(stringify!($param), Self::get_form_part($param)?)
)*; )*;
let form_data = if let Some(description) = description { let form_data = if let Some(description) = description {
@ -217,7 +217,7 @@ macro_rules! route_v2 {
let form_data = Form::new() let form_data = Form::new()
$( $(
.part(stringify!($param), self.get_form_part($param)?) .part(stringify!($param), Self::get_form_part($param)?)
)*; )*;
let url = &self.route(concat!("/api/v2/", $url)); let url = &self.route(concat!("/api/v2/", $url));
@ -261,7 +261,7 @@ macro_rules! route {
let form_data = Form::new() let form_data = Form::new()
$( $(
.part(stringify!($param), self.get_form_part($param)?) .part(stringify!($param), Self::get_form_part($param)?)
)*; )*;
let url = &self.route(concat!("/api/v1/", $url)); let url = &self.route(concat!("/api/v1/", $url));
@ -302,7 +302,7 @@ macro_rules! route {
let form_data = Form::new() let form_data = Form::new()
$( $(
.part(stringify!($param), self.get_form_part($param)?) .part(stringify!($param), Self::get_form_part($param)?)
)*; )*;
let form_data = if let Some(description) = description { let form_data = if let Some(description) = description {

View File

@ -11,15 +11,8 @@ use crate::{
errors::{Error, Result}, errors::{Error, Result},
event_stream::event_stream, event_stream::event_stream,
helpers::read_response::read_response, helpers::read_response::read_response,
log_serde, log_serde, AddFilterRequest, AddPushRequest, Data, NewStatus, Page, StatusesRequest,
AddFilterRequest, UpdateCredsRequest, UpdatePushRequest,
AddPushRequest,
Data,
NewStatus,
Page,
StatusesRequest,
UpdateCredsRequest,
UpdatePushRequest,
}; };
use futures::TryStream; use futures::TryStream;
use log::{as_debug, as_serde, debug, error, trace}; use log::{as_debug, as_serde, debug, error, trace};
@ -27,26 +20,21 @@ use reqwest::{multipart::Part, Client, RequestBuilder};
use url::Url; use url::Url;
use uuid::Uuid; use uuid::Uuid;
#[cfg(feature = "magic")]
use magic::CookieFlags;
#[cfg(feature = "magic")]
use std::sync::Mutex;
/// The Mastodon client is a smart pointer to this struct /// The Mastodon client is a smart pointer to this struct
#[derive(Debug)] #[derive(Debug)]
pub struct MastodonClient { pub struct MastodonClient {
pub(crate) client: Client, pub(crate) client: Client,
/// Raw data about your mastodon instance. /// Raw data about your mastodon instance.
pub data: Data, pub data: Data,
/// A handle to access libmagic for mime-types.
#[cfg(feature = "magic")]
magic: Mutex<magic::Cookie>,
} }
/// Your mastodon application client, handles all requests to and from Mastodon. /// Your mastodon application client, handles all requests to and from Mastodon.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Mastodon(Arc<MastodonClient>); pub struct Mastodon(Arc<MastodonClient>);
// This ensures we don't accidentally make Mastodon not Send or Sync again
static_assertions::assert_impl_all!(Mastodon: Send, Sync);
/// A client for making unauthenticated requests to the public API. /// A client for making unauthenticated requests to the public API.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct MastodonUnauthenticated { pub struct MastodonUnauthenticated {
@ -57,23 +45,8 @@ pub struct MastodonUnauthenticated {
impl From<Data> for Mastodon { impl From<Data> for Mastodon {
/// Creates a mastodon instance from the data struct. /// Creates a mastodon instance from the data struct.
#[cfg(feature = "magic")]
fn from(data: Data) -> Mastodon { fn from(data: Data) -> Mastodon {
Mastodon::new_with_magic( Mastodon::new(Client::new(), data)
Client::new(),
data,
Self::default_magic().expect("failed to open magic cookie or load database"),
)
}
/// Creates a mastodon instance from the data struct.
#[cfg(not(feature = "magic"))]
fn from(data: Data) -> Mastodon {
MastodonClient {
client: Client::new(),
data,
}
.into()
} }
} }
impl Mastodon { impl Mastodon {
@ -175,46 +148,9 @@ impl Mastodon {
stream_direct@"direct", stream_direct@"direct",
} }
/// Return a magic cookie, loaded with the default mime /// A new instance.
#[cfg(feature = "magic")]
fn default_magic() -> Result<magic::Cookie> {
let magic = magic::Cookie::open(Default::default())?;
magic.load::<&str>(&[])?;
magic.set_flags(CookieFlags::MIME)?;
Ok(magic)
}
/// Create a new Mastodon Client
#[cfg(feature = "magic")]
pub fn new(client: Client, data: Data) -> Self { pub fn new(client: Client, data: Data) -> Self {
Self::new_with_magic( Mastodon(Arc::new(MastodonClient { client, data }))
client,
data,
Self::default_magic().expect("failed to open magic cookie or load database"),
)
}
/// Create a new Mastodon Client, passing in a pre-constructed magic
/// cookie.
///
/// This is mainly here so you have a wait to construct the client which
/// won't panic.
#[cfg(feature = "magic")]
pub fn new_with_magic(client: Client, data: Data, magic: magic::Cookie) -> Self {
Mastodon(Arc::new(MastodonClient {
client,
data,
magic: Mutex::new(magic),
}))
}
/// Create a new Mastodon Client
#[cfg(not(feature = "magic"))]
pub fn new(client: Client, data: Data) -> Self {
Mastodon(Arc::new(MastodonClient {
client,
data,
}))
} }
fn route(&self, url: &str) -> String { fn route(&self, url: &str) -> String {
@ -397,20 +333,12 @@ impl Mastodon {
} }
/// Return a part for a multipart form submission from a file, including /// Return a part for a multipart form submission from a file, including
/// the name of the file, and, if the "magic" feature is enabled, the mime- /// the name of the file.
/// type. fn get_form_part(path: impl AsRef<Path>) -> Result<Part> {
fn get_form_part(&self, path: impl AsRef<Path>) -> Result<Part> {
use std::io::Read; use std::io::Read;
let path = path.as_ref(); let path = path.as_ref();
// if it doesn't work, it's no big deal. The server will look at
// the filepath if this isn't here and things should still work.
#[cfg(feature = "magic")]
let mime = self.magic.lock().expect("mutex lock").file(path).ok();
#[cfg(not(feature = "magic"))]
let mime: Option<String> = None;
match std::fs::File::open(path) { match std::fs::File::open(path) {
Ok(mut file) => { Ok(mut file) => {
let mut data = if let Ok(metadata) = file.metadata() { let mut data = if let Ok(metadata) = file.metadata() {
@ -419,13 +347,8 @@ impl Mastodon {
vec![] vec![]
}; };
file.read_to_end(&mut data)?; file.read_to_end(&mut data)?;
let part = // TODO extract filename, error on dirs, etc.
Part::bytes(data).file_name(Cow::Owned(path.to_string_lossy().to_string())); Ok(Part::bytes(data).file_name(Cow::Owned(path.to_string_lossy().to_string())))
Ok(if let Some(mime) = &mime {
part.mime_str(&mime)?
} else {
part
})
}, },
Err(err) => { Err(err) => {
error!(path = as_debug!(path), error = as_debug!(err); "error reading file contents for multipart form"); error!(path = as_debug!(path), error = as_debug!(err); "error reading file contents for multipart form");