Refactor things

Make the overall design a little more flexible if one wants
to easily implement multiple feeds, and also kinda closer to
the original template provided by Bluesky devs
This commit is contained in:
Aleksei Voronov 2023-09-16 21:07:00 +02:00
parent fb17aa3e6a
commit 14b9f846da
9 changed files with 113 additions and 30 deletions

1
Cargo.lock generated
View File

@ -1303,6 +1303,7 @@ dependencies = [
"dotenv",
"futures",
"libipld-core",
"once_cell",
"rs-car",
"scooby",
"serde",

View File

@ -17,6 +17,7 @@ ciborium = "0.2.1"
dotenv = "0.15.0"
futures = "0.3.28"
libipld-core = { version = "0.16.0", features = ["serde-codec"] }
once_cell = "1.18.0"
rs-car = "0.4.1"
scooby = "0.5.0"
serde = "1.0.188"

45
src/algos.rs Normal file
View File

@ -0,0 +1,45 @@
mod nederlandskie;
use std::collections::{HashMap, HashSet};
use anyhow::Result;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use once_cell::sync::Lazy;
use crate::services::database::{Database, Post};
use self::nederlandskie::Nederlandskie;
#[async_trait]
pub trait Algo {
fn should_index_post(&self, author_did: &str, languages: &HashSet<String>, text: &str) -> bool;
async fn fetch_posts(
&self,
database: &Database,
limit: i32,
earlier_than: Option<(DateTime<Utc>, &str)>,
) -> Result<Vec<Post>>;
}
pub type AnyAlgo = Box<dyn Algo + Sync + Send>;
pub type AlgosMap = HashMap<&'static str, AnyAlgo>;
static ALL_ALGOS: Lazy<AlgosMap> = Lazy::new(|| {
let mut m = AlgosMap::new();
m.insert("nederlandskie", Box::new(Nederlandskie));
m
});
pub fn iter_names() -> impl Iterator<Item = &'static str> {
ALL_ALGOS.keys().map(|s| *s)
}
pub fn iter_all() -> impl Iterator<Item = &'static AnyAlgo> {
ALL_ALGOS.values()
}
pub fn get_by_name(name: &str) -> Option<&'static AnyAlgo> {
ALL_ALGOS.get(name)
}

View File

@ -0,0 +1,37 @@
use std::collections::HashSet;
use anyhow::Result;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use super::Algo;
use crate::services::{database::Post, Database};
pub struct Nederlandskie;
/// An algorithm that serves posts written in Russian by people living in Netherlands
#[async_trait]
impl Algo for Nederlandskie {
fn should_index_post(
&self,
_author_did: &str,
languages: &HashSet<String>,
_text: &str,
) -> bool {
// BlueSky gets confused a lot about Russian vs Ukrainian, so skip posts
// that may be in Ukrainian regardless of whether Russian is in the list
languages.contains("ru") && !languages.contains("uk")
}
async fn fetch_posts(
&self,
database: &Database,
limit: i32,
earlier_than: Option<(DateTime<Utc>, &str)>,
) -> Result<Vec<Post>> {
Ok(database
.fetch_posts_by_authors_country("nl", limit as usize, earlier_than)
.await?)
}
}

View File

@ -1,3 +1,4 @@
mod algos;
mod config;
mod processes;
mod services;
@ -8,9 +9,9 @@ use crate::config::Config;
use crate::processes::FeedServer;
use crate::processes::PostIndexer;
use crate::processes::ProfileClassifier;
use crate::services::AI;
use crate::services::Bluesky;
use crate::services::Database;
use crate::services::AI;
#[tokio::main]
async fn main() -> Result<()> {

View File

@ -3,6 +3,7 @@ use atrium_api::app::bsky::feed::describe_feed_generator::{
};
use axum::{extract::State, Json};
use crate::algos;
use crate::processes::feed_server::state::FeedServerState;
pub async fn describe_feed_generator(
@ -10,12 +11,14 @@ pub async fn describe_feed_generator(
) -> Json<FeedGeneratorDescription> {
Json(FeedGeneratorDescription {
did: state.config.service_did.clone(),
feeds: vec![Feed {
feeds: algos::iter_names()
.map(|name| Feed {
uri: format!(
"at://{}/app.bsky.feed.generator/{}",
state.config.publisher_did, "nederlandskie"
state.config.publisher_did, name
),
}],
})
.collect(),
links: None,
})
}

View File

@ -7,13 +7,18 @@ use axum::extract::{Query, State};
use axum::Json;
use chrono::{DateTime, TimeZone, Utc};
use crate::algos;
use crate::processes::feed_server::state::FeedServerState;
pub async fn get_feed_skeleton(
State(state): State<FeedServerState>,
query: Query<FeedSkeletonQuery>,
) -> Json<FeedSkeleton> {
let limit = query.limit.unwrap_or(20) as usize;
let algo = algos::get_by_name(&query.feed)
.ok_or_else(|| anyhow!("Feed {} not found", query.feed))
.unwrap(); // TODO: handle error
let limit = query.limit.unwrap_or(20);
let earlier_than = query
.cursor
.as_deref()
@ -21,11 +26,10 @@ pub async fn get_feed_skeleton(
.transpose()
.unwrap(); // TODO: handle error
let posts = state
.database
.fetch_posts_by_authors_country("ru", limit, earlier_than)
let posts = algo
.fetch_posts(&state.database, limit, earlier_than)
.await
.unwrap();
.unwrap(); // TODO: handle error
let feed = posts
.iter()

View File

@ -1,6 +1,7 @@
use anyhow::Result;
use async_trait::async_trait;
use crate::algos;
use crate::services::bluesky::{Bluesky, Operation, OperationProcessor};
use crate::services::Database;
@ -32,18 +33,7 @@ impl<'a> OperationProcessor for PostIndexer<'a> {
languages,
text,
} => {
// TODO: Configure this via env vars
if !languages.contains("ru") {
return Ok(());
}
// BlueSky gets confused a lot about Russian vs Ukrainian, so skip posts
// that may be in Ukrainian regardless of whether Russian is in the list
// TODO: Configure this via env vars
if languages.contains("uk") {
return Ok(());
}
if algos::iter_all().any(|a| a.should_index_post(author_did, languages, text)) {
println!("received insertable post from {author_did}: {text}");
self.database
@ -51,6 +41,7 @@ impl<'a> OperationProcessor for PostIndexer<'a> {
.await?;
self.database.insert_post(&author_did, &cid, &uri).await?;
}
}
Operation::DeletePost { uri } => {
println!("received a post do delete: {uri}");

View File

@ -1,6 +1,6 @@
mod ai;
pub mod bluesky;
mod database;
pub mod database;
pub use ai::AI;
pub use bluesky::Bluesky;