Refactor things
Make the overall design a little more flexible if one wants to easily implement multiple feeds, and also kinda closer to the original template provided by Bluesky devs
This commit is contained in:
parent
fb17aa3e6a
commit
14b9f846da
|
@ -1303,6 +1303,7 @@ dependencies = [
|
|||
"dotenv",
|
||||
"futures",
|
||||
"libipld-core",
|
||||
"once_cell",
|
||||
"rs-car",
|
||||
"scooby",
|
||||
"serde",
|
||||
|
|
|
@ -17,6 +17,7 @@ ciborium = "0.2.1"
|
|||
dotenv = "0.15.0"
|
||||
futures = "0.3.28"
|
||||
libipld-core = { version = "0.16.0", features = ["serde-codec"] }
|
||||
once_cell = "1.18.0"
|
||||
rs-car = "0.4.1"
|
||||
scooby = "0.5.0"
|
||||
serde = "1.0.188"
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
mod nederlandskie;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::services::database::{Database, Post};
|
||||
|
||||
use self::nederlandskie::Nederlandskie;
|
||||
|
||||
#[async_trait]
|
||||
pub trait Algo {
|
||||
fn should_index_post(&self, author_did: &str, languages: &HashSet<String>, text: &str) -> bool;
|
||||
|
||||
async fn fetch_posts(
|
||||
&self,
|
||||
database: &Database,
|
||||
limit: i32,
|
||||
earlier_than: Option<(DateTime<Utc>, &str)>,
|
||||
) -> Result<Vec<Post>>;
|
||||
}
|
||||
|
||||
pub type AnyAlgo = Box<dyn Algo + Sync + Send>;
|
||||
pub type AlgosMap = HashMap<&'static str, AnyAlgo>;
|
||||
|
||||
static ALL_ALGOS: Lazy<AlgosMap> = Lazy::new(|| {
|
||||
let mut m = AlgosMap::new();
|
||||
m.insert("nederlandskie", Box::new(Nederlandskie));
|
||||
m
|
||||
});
|
||||
|
||||
pub fn iter_names() -> impl Iterator<Item = &'static str> {
|
||||
ALL_ALGOS.keys().map(|s| *s)
|
||||
}
|
||||
|
||||
pub fn iter_all() -> impl Iterator<Item = &'static AnyAlgo> {
|
||||
ALL_ALGOS.values()
|
||||
}
|
||||
|
||||
pub fn get_by_name(name: &str) -> Option<&'static AnyAlgo> {
|
||||
ALL_ALGOS.get(name)
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
use std::collections::HashSet;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
use super::Algo;
|
||||
|
||||
use crate::services::{database::Post, Database};
|
||||
|
||||
pub struct Nederlandskie;
|
||||
|
||||
/// An algorithm that serves posts written in Russian by people living in Netherlands
|
||||
#[async_trait]
|
||||
impl Algo for Nederlandskie {
|
||||
fn should_index_post(
|
||||
&self,
|
||||
_author_did: &str,
|
||||
languages: &HashSet<String>,
|
||||
_text: &str,
|
||||
) -> bool {
|
||||
// BlueSky gets confused a lot about Russian vs Ukrainian, so skip posts
|
||||
// that may be in Ukrainian regardless of whether Russian is in the list
|
||||
languages.contains("ru") && !languages.contains("uk")
|
||||
}
|
||||
|
||||
async fn fetch_posts(
|
||||
&self,
|
||||
database: &Database,
|
||||
limit: i32,
|
||||
earlier_than: Option<(DateTime<Utc>, &str)>,
|
||||
) -> Result<Vec<Post>> {
|
||||
Ok(database
|
||||
.fetch_posts_by_authors_country("nl", limit as usize, earlier_than)
|
||||
.await?)
|
||||
}
|
||||
}
|
|
@ -1,3 +1,4 @@
|
|||
mod algos;
|
||||
mod config;
|
||||
mod processes;
|
||||
mod services;
|
||||
|
@ -8,9 +9,9 @@ use crate::config::Config;
|
|||
use crate::processes::FeedServer;
|
||||
use crate::processes::PostIndexer;
|
||||
use crate::processes::ProfileClassifier;
|
||||
use crate::services::AI;
|
||||
use crate::services::Bluesky;
|
||||
use crate::services::Database;
|
||||
use crate::services::AI;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
|
|
|
@ -3,6 +3,7 @@ use atrium_api::app::bsky::feed::describe_feed_generator::{
|
|||
};
|
||||
use axum::{extract::State, Json};
|
||||
|
||||
use crate::algos;
|
||||
use crate::processes::feed_server::state::FeedServerState;
|
||||
|
||||
pub async fn describe_feed_generator(
|
||||
|
@ -10,12 +11,14 @@ pub async fn describe_feed_generator(
|
|||
) -> Json<FeedGeneratorDescription> {
|
||||
Json(FeedGeneratorDescription {
|
||||
did: state.config.service_did.clone(),
|
||||
feeds: vec![Feed {
|
||||
uri: format!(
|
||||
"at://{}/app.bsky.feed.generator/{}",
|
||||
state.config.publisher_did, "nederlandskie"
|
||||
),
|
||||
}],
|
||||
feeds: algos::iter_names()
|
||||
.map(|name| Feed {
|
||||
uri: format!(
|
||||
"at://{}/app.bsky.feed.generator/{}",
|
||||
state.config.publisher_did, name
|
||||
),
|
||||
})
|
||||
.collect(),
|
||||
links: None,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -7,13 +7,18 @@ use axum::extract::{Query, State};
|
|||
use axum::Json;
|
||||
use chrono::{DateTime, TimeZone, Utc};
|
||||
|
||||
use crate::algos;
|
||||
use crate::processes::feed_server::state::FeedServerState;
|
||||
|
||||
pub async fn get_feed_skeleton(
|
||||
State(state): State<FeedServerState>,
|
||||
query: Query<FeedSkeletonQuery>,
|
||||
) -> Json<FeedSkeleton> {
|
||||
let limit = query.limit.unwrap_or(20) as usize;
|
||||
let algo = algos::get_by_name(&query.feed)
|
||||
.ok_or_else(|| anyhow!("Feed {} not found", query.feed))
|
||||
.unwrap(); // TODO: handle error
|
||||
|
||||
let limit = query.limit.unwrap_or(20);
|
||||
let earlier_than = query
|
||||
.cursor
|
||||
.as_deref()
|
||||
|
@ -21,11 +26,10 @@ pub async fn get_feed_skeleton(
|
|||
.transpose()
|
||||
.unwrap(); // TODO: handle error
|
||||
|
||||
let posts = state
|
||||
.database
|
||||
.fetch_posts_by_authors_country("ru", limit, earlier_than)
|
||||
let posts = algo
|
||||
.fetch_posts(&state.database, limit, earlier_than)
|
||||
.await
|
||||
.unwrap();
|
||||
.unwrap(); // TODO: handle error
|
||||
|
||||
let feed = posts
|
||||
.iter()
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::algos;
|
||||
use crate::services::bluesky::{Bluesky, Operation, OperationProcessor};
|
||||
use crate::services::Database;
|
||||
|
||||
|
@ -32,24 +33,14 @@ impl<'a> OperationProcessor for PostIndexer<'a> {
|
|||
languages,
|
||||
text,
|
||||
} => {
|
||||
// TODO: Configure this via env vars
|
||||
if !languages.contains("ru") {
|
||||
return Ok(());
|
||||
if algos::iter_all().any(|a| a.should_index_post(author_did, languages, text)) {
|
||||
println!("received insertable post from {author_did}: {text}");
|
||||
|
||||
self.database
|
||||
.insert_profile_if_it_doesnt_exist(&author_did)
|
||||
.await?;
|
||||
self.database.insert_post(&author_did, &cid, &uri).await?;
|
||||
}
|
||||
|
||||
// BlueSky gets confused a lot about Russian vs Ukrainian, so skip posts
|
||||
// that may be in Ukrainian regardless of whether Russian is in the list
|
||||
// TODO: Configure this via env vars
|
||||
if languages.contains("uk") {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("received insertable post from {author_did}: {text}");
|
||||
|
||||
self.database
|
||||
.insert_profile_if_it_doesnt_exist(&author_did)
|
||||
.await?;
|
||||
self.database.insert_post(&author_did, &cid, &uri).await?;
|
||||
}
|
||||
Operation::DeletePost { uri } => {
|
||||
println!("received a post do delete: {uri}");
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
mod ai;
|
||||
pub mod bluesky;
|
||||
mod database;
|
||||
pub mod database;
|
||||
|
||||
pub use ai::AI;
|
||||
pub use bluesky::Bluesky;
|
||||
|
|
Loading…
Reference in New Issue