diff --git a/Cargo.lock b/Cargo.lock index 7f56277..25af601 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1303,6 +1303,7 @@ dependencies = [ "dotenv", "futures", "libipld-core", + "once_cell", "rs-car", "scooby", "serde", diff --git a/Cargo.toml b/Cargo.toml index 9c4361a..1643fb1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ ciborium = "0.2.1" dotenv = "0.15.0" futures = "0.3.28" libipld-core = { version = "0.16.0", features = ["serde-codec"] } +once_cell = "1.18.0" rs-car = "0.4.1" scooby = "0.5.0" serde = "1.0.188" diff --git a/src/algos.rs b/src/algos.rs new file mode 100644 index 0000000..b608203 --- /dev/null +++ b/src/algos.rs @@ -0,0 +1,45 @@ +mod nederlandskie; + +use std::collections::{HashMap, HashSet}; + +use anyhow::Result; +use async_trait::async_trait; +use chrono::{DateTime, Utc}; +use once_cell::sync::Lazy; + +use crate::services::database::{Database, Post}; + +use self::nederlandskie::Nederlandskie; + +#[async_trait] +pub trait Algo { + fn should_index_post(&self, author_did: &str, languages: &HashSet, text: &str) -> bool; + + async fn fetch_posts( + &self, + database: &Database, + limit: i32, + earlier_than: Option<(DateTime, &str)>, + ) -> Result>; +} + +pub type AnyAlgo = Box; +pub type AlgosMap = HashMap<&'static str, AnyAlgo>; + +static ALL_ALGOS: Lazy = Lazy::new(|| { + let mut m = AlgosMap::new(); + m.insert("nederlandskie", Box::new(Nederlandskie)); + m +}); + +pub fn iter_names() -> impl Iterator { + ALL_ALGOS.keys().map(|s| *s) +} + +pub fn iter_all() -> impl Iterator { + ALL_ALGOS.values() +} + +pub fn get_by_name(name: &str) -> Option<&'static AnyAlgo> { + ALL_ALGOS.get(name) +} diff --git a/src/algos/nederlandskie.rs b/src/algos/nederlandskie.rs new file mode 100644 index 0000000..f235c31 --- /dev/null +++ b/src/algos/nederlandskie.rs @@ -0,0 +1,37 @@ +use std::collections::HashSet; + +use anyhow::Result; +use async_trait::async_trait; +use chrono::{DateTime, Utc}; + +use super::Algo; + +use crate::services::{database::Post, Database}; + +pub struct Nederlandskie; + +/// An algorithm that serves posts written in Russian by people living in Netherlands +#[async_trait] +impl Algo for Nederlandskie { + fn should_index_post( + &self, + _author_did: &str, + languages: &HashSet, + _text: &str, + ) -> bool { + // BlueSky gets confused a lot about Russian vs Ukrainian, so skip posts + // that may be in Ukrainian regardless of whether Russian is in the list + languages.contains("ru") && !languages.contains("uk") + } + + async fn fetch_posts( + &self, + database: &Database, + limit: i32, + earlier_than: Option<(DateTime, &str)>, + ) -> Result> { + Ok(database + .fetch_posts_by_authors_country("nl", limit as usize, earlier_than) + .await?) + } +} diff --git a/src/main.rs b/src/main.rs index 1248154..5bda794 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +mod algos; mod config; mod processes; mod services; @@ -8,9 +9,9 @@ use crate::config::Config; use crate::processes::FeedServer; use crate::processes::PostIndexer; use crate::processes::ProfileClassifier; -use crate::services::AI; use crate::services::Bluesky; use crate::services::Database; +use crate::services::AI; #[tokio::main] async fn main() -> Result<()> { diff --git a/src/processes/feed_server/endpoints/describe_feed_generator.rs b/src/processes/feed_server/endpoints/describe_feed_generator.rs index 5d2e414..1648f1f 100644 --- a/src/processes/feed_server/endpoints/describe_feed_generator.rs +++ b/src/processes/feed_server/endpoints/describe_feed_generator.rs @@ -3,6 +3,7 @@ use atrium_api::app::bsky::feed::describe_feed_generator::{ }; use axum::{extract::State, Json}; +use crate::algos; use crate::processes::feed_server::state::FeedServerState; pub async fn describe_feed_generator( @@ -10,12 +11,14 @@ pub async fn describe_feed_generator( ) -> Json { Json(FeedGeneratorDescription { did: state.config.service_did.clone(), - feeds: vec![Feed { - uri: format!( - "at://{}/app.bsky.feed.generator/{}", - state.config.publisher_did, "nederlandskie" - ), - }], + feeds: algos::iter_names() + .map(|name| Feed { + uri: format!( + "at://{}/app.bsky.feed.generator/{}", + state.config.publisher_did, name + ), + }) + .collect(), links: None, }) } diff --git a/src/processes/feed_server/endpoints/get_feed_skeleton.rs b/src/processes/feed_server/endpoints/get_feed_skeleton.rs index 8acaa25..a7b507d 100644 --- a/src/processes/feed_server/endpoints/get_feed_skeleton.rs +++ b/src/processes/feed_server/endpoints/get_feed_skeleton.rs @@ -7,13 +7,18 @@ use axum::extract::{Query, State}; use axum::Json; use chrono::{DateTime, TimeZone, Utc}; +use crate::algos; use crate::processes::feed_server::state::FeedServerState; pub async fn get_feed_skeleton( State(state): State, query: Query, ) -> Json { - let limit = query.limit.unwrap_or(20) as usize; + let algo = algos::get_by_name(&query.feed) + .ok_or_else(|| anyhow!("Feed {} not found", query.feed)) + .unwrap(); // TODO: handle error + + let limit = query.limit.unwrap_or(20); let earlier_than = query .cursor .as_deref() @@ -21,11 +26,10 @@ pub async fn get_feed_skeleton( .transpose() .unwrap(); // TODO: handle error - let posts = state - .database - .fetch_posts_by_authors_country("ru", limit, earlier_than) + let posts = algo + .fetch_posts(&state.database, limit, earlier_than) .await - .unwrap(); + .unwrap(); // TODO: handle error let feed = posts .iter() diff --git a/src/processes/post_indexer.rs b/src/processes/post_indexer.rs index aa118fd..7554210 100644 --- a/src/processes/post_indexer.rs +++ b/src/processes/post_indexer.rs @@ -1,6 +1,7 @@ use anyhow::Result; use async_trait::async_trait; +use crate::algos; use crate::services::bluesky::{Bluesky, Operation, OperationProcessor}; use crate::services::Database; @@ -32,24 +33,14 @@ impl<'a> OperationProcessor for PostIndexer<'a> { languages, text, } => { - // TODO: Configure this via env vars - if !languages.contains("ru") { - return Ok(()); + if algos::iter_all().any(|a| a.should_index_post(author_did, languages, text)) { + println!("received insertable post from {author_did}: {text}"); + + self.database + .insert_profile_if_it_doesnt_exist(&author_did) + .await?; + self.database.insert_post(&author_did, &cid, &uri).await?; } - - // BlueSky gets confused a lot about Russian vs Ukrainian, so skip posts - // that may be in Ukrainian regardless of whether Russian is in the list - // TODO: Configure this via env vars - if languages.contains("uk") { - return Ok(()); - } - - println!("received insertable post from {author_did}: {text}"); - - self.database - .insert_profile_if_it_doesnt_exist(&author_did) - .await?; - self.database.insert_post(&author_did, &cid, &uri).await?; } Operation::DeletePost { uri } => { println!("received a post do delete: {uri}"); diff --git a/src/services.rs b/src/services.rs index 6ad9a56..b024675 100644 --- a/src/services.rs +++ b/src/services.rs @@ -1,6 +1,6 @@ mod ai; pub mod bluesky; -mod database; +pub mod database; pub use ai::AI; pub use bluesky::Bluesky;