Attempt to also index all posts made by posters that previously posted in Russian

I don't know if this will work well though, performance-wise.
It's basically going to now do a query per post, which may or may
not be a great idea
This commit is contained in:
Aleksei Voronov 2023-10-16 12:10:12 +02:00
parent 8ad19f6fa5
commit 556f939774
3 changed files with 44 additions and 5 deletions

View File

@ -14,11 +14,25 @@ use crate::services::database::{self, Database};
/// An algorithm that serves posts written in Russian by people living in Netherlands
pub struct Nederlandskie {
language_detector: Arc<LanguageDetector>,
database: Arc<Database>,
}
impl Nederlandskie {
pub fn new(language_detector: Arc<LanguageDetector>) -> Self {
Self { language_detector }
pub fn new(language_detector: Arc<LanguageDetector>, database: Arc<Database>) -> Self {
Self {
language_detector,
database,
}
}
}
impl Nederlandskie {
fn is_post_in_russian(&self, post: &bluesky::PostRecord) -> bool {
self.language_detector.detect_language_of(&post.text) == Some(Russian)
}
async fn is_profile_residing_in_netherlands(&self, did: &str) -> Result<bool> {
Ok(self.database.is_profile_in_this_country(did, "nl").await? == Some(true))
}
}
@ -26,10 +40,11 @@ impl Nederlandskie {
impl Algo for Nederlandskie {
async fn should_index_post(
&self,
_author_did: &str,
author_did: &str,
post: &bluesky::PostRecord,
) -> Result<bool> {
Ok(self.language_detector.detect_language_of(&post.text) == Some(Russian))
Ok(self.is_post_in_russian(&post)
|| self.is_profile_residing_in_netherlands(author_did).await?)
}
async fn fetch_posts(

View File

@ -36,7 +36,10 @@ async fn main() -> Result<()> {
let algos = Arc::new(
AlgosBuilder::new()
.add("nederlandskie", Nederlandskie::new(language_detector))
.add(
"nederlandskie",
Nederlandskie::new(language_detector, database.clone()),
)
.build(),
);

View File

@ -192,6 +192,27 @@ impl Database {
Ok(true)
}
pub async fn is_profile_in_this_country(
&self,
did: &str,
country: &str,
) -> Result<Option<bool>> {
let mut params = Parameters::new();
Ok(query(
&select("likely_country_of_living")
.from("Profile")
.where_(format!("did = {}", params.next()))
.where_("has_been_processed = TRUE")
.to_string(),
)
.bind(did)
.map(|r: PgRow| r.get("likely_country_of_living"))
.map(|c: String| c == country)
.fetch_optional(&self.connection_pool)
.await?)
}
pub async fn fetch_subscription_cursor(&self, did: &str) -> Result<Option<i32>> {
let mut params = Parameters::new();