Drop the magic bullshit

There were too many false positive responses from libmagic to have this
just happen in the background. In a future release I may add a specific
error which is returned when the given file has no extension, and
downstream application developers can use that as an indicator that they
should ask the user if they want to detect the filetype, making use of
libmagic directly in the downstream application as appropriate.
This commit is contained in:
D. Scott Boggs 2022-12-28 07:58:40 -05:00
parent 7653513c6f
commit 4fb24a5a21
6 changed files with 21 additions and 220 deletions

View File

@ -7,7 +7,6 @@
"favourited",
"indoc",
"isolang",
"libmagic",
"querystring",
"reblog",
"reqwest",

View File

@ -1,6 +1,6 @@
[package]
name = "mastodon-async"
version = "1.0.2"
version = "1.0.3"
authors = ["Aaron Power <theaaronepower@gmail.com>", "Paul Woolcock <paul@woolcock.us>", "D. Scott Boggs <scott@tams.tech>"]
description = "A wrapper around the Mastodon API."
readme = "README.md"
@ -24,10 +24,7 @@ url = "1"
# Provides parsing for the link header in get_links() in page.rs
hyper-old-types = "0.11.0"
futures-util = "0.3.25"
[dependencies.magic]
version = "0.13.0"
optional = true
static_assertions = "1.1.0"
[dependencies.uuid]
version = "1.2.2"
@ -85,14 +82,10 @@ features = ["async_tokio"]
[build-dependencies.skeptic]
version = "0.13"
[[bench]]
name = "magic"
harness = false
[features]
all = ["toml", "json", "env", "magic"]
all = ["toml", "json", "env"]
# default = ["reqwest/default-tls"]
default = ["reqwest/default-tls", "magic"]
default = ["reqwest/default-tls"]
env = ["envy"]
json = []
rustls-tls = ["reqwest/rustls-tls"]

View File

@ -1,103 +0,0 @@
use std::{
fs::{read, write},
io::{stdout, ErrorKind, Write},
};
use criterion::{criterion_group, criterion_main, Criterion};
use magic::CookieFlags;
async fn load_image() -> Vec<u8> {
match read("/tmp/test.png") {
Ok(img) => img,
Err(err) if err.kind() == ErrorKind::NotFound => {
let image = reqwest::Client::new()
.get("https://httpbin.org/image/png")
.header("Accept", "image/png")
.send()
.await
.expect("connection")
.error_for_status()
.expect("OK response")
.bytes()
.await
.expect("read response");
write("/tmp/test.png", &image).expect("cache file");
image.into()
},
Err(err) => panic!("error reading cached PNG file: {err:?}"),
}
}
fn load_once(c: &mut Criterion) {
let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open");
cookie.load::<&str>(&[]).expect("cookie.load");
eprintln!("file: {}", file!());
let buf = read(file!()).expect("read");
c.bench_function("rust file, load once", |b| {
b.iter(|| {
// mis-detected as text/x-asm when chaining .bytes() at the beginning of a line
assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]);
});
});
let image = tokio_test::block_on(async { load_image().await });
c.bench_function("PNG file, load once", |b| {
b.iter(|| {
assert_eq!("image/png", cookie.buffer(&image).expect("detection"));
});
});
}
fn load_each_time(c: &mut Criterion) {
let buf = read(file!()).expect("read");
c.bench_function("rust file, load each time", |b| {
b.iter(|| {
let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open");
cookie.load::<&str>(&[]).expect("cookie.load");
assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]);
});
});
let image = tokio_test::block_on(async { load_image().await });
c.bench_function("PNG file, load each time", |b| {
b.iter(|| {
let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open");
cookie.load::<&str>(&[]).expect("cookie.load");
assert_eq!("image/png", cookie.buffer(&image).expect("detection"));
});
});
}
fn load_from_buffer_each_time(c: &mut Criterion) {
let cookie = magic::Cookie::open(CookieFlags::MIME_TYPE).expect("Cookie::open");
let db = read("/usr/share/file/misc/magic.mgc").expect("read database");
let buf = read(file!()).expect("read");
c.bench_function("rust file, load from buffer each time", |b| {
b.iter(|| {
cookie.load_buffers(&[&db]).expect("cookie.load_buffers");
assert_eq!("text/", &cookie.buffer(&buf).expect("detection")[0..5]);
});
});
let image = tokio_test::block_on(async { load_image().await });
c.bench_function("PNG file, load from buffer each time", |b| {
b.iter(|| {
cookie.load_buffers(&[&db]).expect("cookie.load_buffers");
assert_eq!("image/png", cookie.buffer(&image).expect("detection"));
});
});
}
criterion_group!(
magic_bench,
load_once,
load_each_time,
load_from_buffer_each_time
);
criterion_main!(magic_bench);

View File

@ -3,8 +3,6 @@ use std::{error, fmt, io::Error as IoError, num::TryFromIntError};
#[cfg(feature = "env")]
use envy::Error as EnvyError;
use hyper_old_types::Error as HeaderParseError;
#[cfg(feature = "magic")]
use magic::MagicError;
use reqwest::{header::ToStrError as HeaderStrError, Error as HttpError, StatusCode};
use serde::Deserialize;
use serde_json::Error as SerdeError;
@ -70,9 +68,6 @@ pub enum Error {
/// At the time of writing, this can only be triggered when a file is
/// larger than the system's usize allows.
IntConversion(TryFromIntError),
#[cfg(feature = "magic")]
/// An error received from the magic crate
Magic(MagicError),
/// Other errors
Other(String),
}
@ -102,11 +97,7 @@ impl error::Error for Error {
Envy(ref e) => Some(e),
SerdeQs(ref e) => Some(e),
IntConversion(ref e) => Some(e),
#[cfg(feature = "magic")]
Magic(ref e) => Some(e),
Api {
..
}
Api { .. }
| ClientIdRequired
| ClientSecretRequired
| AccessTokenRequired
@ -164,8 +155,6 @@ from! {
SerdeQsError => SerdeQs,
String => Other,
TryFromIntError => IntConversion,
#[cfg(feature = "magic")]
MagicError => Magic,
}
#[macro_export]

View File

@ -173,7 +173,7 @@ macro_rules! route_v2 {
let form_data = Form::new()
$(
.part(stringify!($param), self.get_form_part($param)?)
.part(stringify!($param), Self::get_form_part($param)?)
)*;
let form_data = if let Some(description) = description {
@ -217,7 +217,7 @@ macro_rules! route_v2 {
let form_data = Form::new()
$(
.part(stringify!($param), self.get_form_part($param)?)
.part(stringify!($param), Self::get_form_part($param)?)
)*;
let url = &self.route(concat!("/api/v2/", $url));
@ -261,7 +261,7 @@ macro_rules! route {
let form_data = Form::new()
$(
.part(stringify!($param), self.get_form_part($param)?)
.part(stringify!($param), Self::get_form_part($param)?)
)*;
let url = &self.route(concat!("/api/v1/", $url));
@ -302,7 +302,7 @@ macro_rules! route {
let form_data = Form::new()
$(
.part(stringify!($param), self.get_form_part($param)?)
.part(stringify!($param), Self::get_form_part($param)?)
)*;
let form_data = if let Some(description) = description {

View File

@ -11,15 +11,8 @@ use crate::{
errors::{Error, Result},
event_stream::event_stream,
helpers::read_response::read_response,
log_serde,
AddFilterRequest,
AddPushRequest,
Data,
NewStatus,
Page,
StatusesRequest,
UpdateCredsRequest,
UpdatePushRequest,
log_serde, AddFilterRequest, AddPushRequest, Data, NewStatus, Page, StatusesRequest,
UpdateCredsRequest, UpdatePushRequest,
};
use futures::TryStream;
use log::{as_debug, as_serde, debug, error, trace};
@ -27,26 +20,21 @@ use reqwest::{multipart::Part, Client, RequestBuilder};
use url::Url;
use uuid::Uuid;
#[cfg(feature = "magic")]
use magic::CookieFlags;
#[cfg(feature = "magic")]
use std::sync::Mutex;
/// The Mastodon client is a smart pointer to this struct
#[derive(Debug)]
pub struct MastodonClient {
pub(crate) client: Client,
/// Raw data about your mastodon instance.
pub data: Data,
/// A handle to access libmagic for mime-types.
#[cfg(feature = "magic")]
magic: Mutex<magic::Cookie>,
}
/// Your mastodon application client, handles all requests to and from Mastodon.
#[derive(Debug, Clone)]
pub struct Mastodon(Arc<MastodonClient>);
// This ensures we don't accidentally make Mastodon not Send or Sync again
static_assertions::assert_impl_all!(Mastodon: Send, Sync);
/// A client for making unauthenticated requests to the public API.
#[derive(Clone, Debug)]
pub struct MastodonUnauthenticated {
@ -57,23 +45,8 @@ pub struct MastodonUnauthenticated {
impl From<Data> for Mastodon {
/// Creates a mastodon instance from the data struct.
#[cfg(feature = "magic")]
fn from(data: Data) -> Mastodon {
Mastodon::new_with_magic(
Client::new(),
data,
Self::default_magic().expect("failed to open magic cookie or load database"),
)
}
/// Creates a mastodon instance from the data struct.
#[cfg(not(feature = "magic"))]
fn from(data: Data) -> Mastodon {
MastodonClient {
client: Client::new(),
data,
}
.into()
Mastodon::new(Client::new(), data)
}
}
impl Mastodon {
@ -175,46 +148,9 @@ impl Mastodon {
stream_direct@"direct",
}
/// Return a magic cookie, loaded with the default mime
#[cfg(feature = "magic")]
fn default_magic() -> Result<magic::Cookie> {
let magic = magic::Cookie::open(Default::default())?;
magic.load::<&str>(&[])?;
magic.set_flags(CookieFlags::MIME)?;
Ok(magic)
}
/// Create a new Mastodon Client
#[cfg(feature = "magic")]
/// A new instance.
pub fn new(client: Client, data: Data) -> Self {
Self::new_with_magic(
client,
data,
Self::default_magic().expect("failed to open magic cookie or load database"),
)
}
/// Create a new Mastodon Client, passing in a pre-constructed magic
/// cookie.
///
/// This is mainly here so you have a wait to construct the client which
/// won't panic.
#[cfg(feature = "magic")]
pub fn new_with_magic(client: Client, data: Data, magic: magic::Cookie) -> Self {
Mastodon(Arc::new(MastodonClient {
client,
data,
magic: Mutex::new(magic),
}))
}
/// Create a new Mastodon Client
#[cfg(not(feature = "magic"))]
pub fn new(client: Client, data: Data) -> Self {
Mastodon(Arc::new(MastodonClient {
client,
data,
}))
Mastodon(Arc::new(MastodonClient { client, data }))
}
fn route(&self, url: &str) -> String {
@ -397,20 +333,12 @@ impl Mastodon {
}
/// Return a part for a multipart form submission from a file, including
/// the name of the file, and, if the "magic" feature is enabled, the mime-
/// type.
fn get_form_part(&self, path: impl AsRef<Path>) -> Result<Part> {
/// the name of the file.
fn get_form_part(path: impl AsRef<Path>) -> Result<Part> {
use std::io::Read;
let path = path.as_ref();
// if it doesn't work, it's no big deal. The server will look at
// the filepath if this isn't here and things should still work.
#[cfg(feature = "magic")]
let mime = self.magic.lock().expect("mutex lock").file(path).ok();
#[cfg(not(feature = "magic"))]
let mime: Option<String> = None;
match std::fs::File::open(path) {
Ok(mut file) => {
let mut data = if let Ok(metadata) = file.metadata() {
@ -419,13 +347,8 @@ impl Mastodon {
vec![]
};
file.read_to_end(&mut data)?;
let part =
Part::bytes(data).file_name(Cow::Owned(path.to_string_lossy().to_string()));
Ok(if let Some(mime) = &mime {
part.mime_str(&mime)?
} else {
part
})
// TODO extract filename, error on dirs, etc.
Ok(Part::bytes(data).file_name(Cow::Owned(path.to_string_lossy().to_string())))
},
Err(err) => {
error!(path = as_debug!(path), error = as_debug!(err); "error reading file contents for multipart form");