Pass around bluesky objects instead of restructuring them everywhere

Makes thing a little simpler in many places, especially once I start adding more fields to posts
This commit is contained in:
Aleksei Voronov 2023-10-15 11:55:25 +02:00
parent 5eeb0e45b1
commit 96915ca986
8 changed files with 33 additions and 34 deletions

View File

@ -1,30 +1,27 @@
mod nederlandskie; mod nederlandskie;
use std::collections::{HashMap, HashSet}; use std::collections::HashMap;
use anyhow::Result; use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use crate::services::database::{Database, Post}; use crate::services::bluesky;
use crate::services::database::{self, Database};
pub use self::nederlandskie::Nederlandskie; pub use self::nederlandskie::Nederlandskie;
#[async_trait] #[async_trait]
pub trait Algo { pub trait Algo {
async fn should_index_post( async fn should_index_post(&self, author_did: &str, post: &bluesky::PostRecord)
&self, -> Result<bool>;
author_did: &str,
languages: &HashSet<String>,
text: &str,
) -> Result<bool>;
async fn fetch_posts( async fn fetch_posts(
&self, &self,
database: &Database, database: &Database,
limit: i32, limit: i32,
earlier_than: Option<(DateTime<Utc>, &str)>, earlier_than: Option<(DateTime<Utc>, &str)>,
) -> Result<Vec<Post>>; ) -> Result<Vec<database::Post>>;
} }
pub type AnyAlgo = Box<dyn Algo + Sync + Send>; pub type AnyAlgo = Box<dyn Algo + Sync + Send>;

View File

@ -1,4 +1,3 @@
use std::collections::HashSet;
use std::sync::Arc; use std::sync::Arc;
use anyhow::Result; use anyhow::Result;
@ -9,7 +8,8 @@ use lingua::LanguageDetector;
use super::Algo; use super::Algo;
use crate::services::{database::Post, Database}; use crate::services::bluesky;
use crate::services::database::{self, Database};
pub struct Nederlandskie { pub struct Nederlandskie {
language_detector: Arc<LanguageDetector>, language_detector: Arc<LanguageDetector>,
@ -27,10 +27,9 @@ impl Algo for Nederlandskie {
async fn should_index_post( async fn should_index_post(
&self, &self,
_author_did: &str, _author_did: &str,
_languages: &HashSet<String>, post: &bluesky::PostRecord,
text: &str,
) -> Result<bool> { ) -> Result<bool> {
Ok(self.language_detector.detect_language_of(text) == Some(Russian)) Ok(self.language_detector.detect_language_of(&post.text) == Some(Russian))
} }
async fn fetch_posts( async fn fetch_posts(
@ -38,7 +37,7 @@ impl Algo for Nederlandskie {
database: &Database, database: &Database,
limit: i32, limit: i32,
earlier_than: Option<(DateTime<Utc>, &str)>, earlier_than: Option<(DateTime<Utc>, &str)>,
) -> Result<Vec<Post>> { ) -> Result<Vec<database::Post>> {
Ok(database Ok(database
.fetch_posts_by_authors_country("nl", limit as usize, earlier_than) .fetch_posts_by_authors_country("nl", limit as usize, earlier_than)
.await?) .await?)

View File

@ -75,12 +75,11 @@ impl CommitProcessor for PostIndexer {
author_did, author_did,
cid, cid,
uri, uri,
languages, post,
text,
} => { } => {
for algo in self.algos.iter_all() { for algo in self.algos.iter_all() {
if algo.should_index_post(author_did, languages, text).await? { if algo.should_index_post(author_did, post).await? {
info!("Received insertable post from {author_did}: {text}"); info!("Received insertable post from {author_did}: {}", post.text);
self.database self.database
.insert_profile_if_it_doesnt_exist(author_did) .insert_profile_if_it_doesnt_exist(author_did)

View File

@ -4,4 +4,5 @@ mod internals;
mod streaming; mod streaming;
pub use client::Bluesky; pub use client::Bluesky;
pub use entities::{FollowRecord, LikeRecord, PostRecord, Session};
pub use streaming::{CommitDetails, CommitProcessor, Operation}; pub use streaming::{CommitDetails, CommitProcessor, Operation};

View File

@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
use crate::services::bluesky::internals::cbor::CborValue; use crate::services::bluesky::internals::cbor::CborValue;
#[derive(Debug)]
pub struct FollowRecord { pub struct FollowRecord {
pub subject: String, pub subject: String,
} }

View File

@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
use crate::services::bluesky::internals::cbor::CborValue; use crate::services::bluesky::internals::cbor::CborValue;
#[derive(Debug)]
pub struct LikeRecord { pub struct LikeRecord {
pub subject: Subject, pub subject: Subject,
} }
@ -23,6 +24,7 @@ impl TryFrom<CborValue> for LikeRecord {
} }
} }
#[derive(Debug)]
pub struct Subject { pub struct Subject {
pub cid: String, pub cid: String,
pub uri: String, pub uri: String,

View File

@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
use crate::services::bluesky::internals::cbor::CborValue; use crate::services::bluesky::internals::cbor::CborValue;
#[derive(Debug)]
pub struct PostRecord { pub struct PostRecord {
pub langs: Option<Vec<String>>, pub langs: Option<Vec<String>>,
pub text: String, pub text: String,
@ -20,7 +21,10 @@ impl TryFrom<CborValue> for PostRecord {
.remove("text") .remove("text")
.ok_or_else(|| anyhow!("Missing field: text"))? .ok_or_else(|| anyhow!("Missing field: text"))?
.try_into()?, .try_into()?,
langs: map.remove("langs").map(|value| value.try_into()).transpose()?, langs: map
.remove("langs")
.map(|value| value.try_into())
.transpose()?,
}) })
} }
} }

View File

@ -1,4 +1,4 @@
use std::collections::{HashMap, HashSet}; use std::collections::HashMap;
use anyhow::Result; use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
@ -35,21 +35,19 @@ pub enum Operation {
author_did: String, author_did: String,
cid: String, cid: String,
uri: String, uri: String,
languages: HashSet<String>, post: PostRecord,
text: String,
}, },
CreateLike { CreateLike {
author_did: String, author_did: String,
cid: String, cid: String,
uri: String, uri: String,
subject_cid: String, like: LikeRecord,
subject_uri: String,
}, },
CreateFollow { CreateFollow {
author_did: String, author_did: String,
cid: String, cid: String,
uri: String, uri: String,
subject: String, follow: FollowRecord,
}, },
DeletePost { DeletePost {
uri: String, uri: String,
@ -116,35 +114,33 @@ async fn extract_operations(commit: &Commit) -> Result<Vec<Operation>> {
match collection { match collection {
COLLECTION_POST => { COLLECTION_POST => {
let record: PostRecord = read_record(block)?; let post: PostRecord = read_record(block)?;
Operation::CreatePost { Operation::CreatePost {
author_did: commit.repo.clone(), author_did: commit.repo.clone(),
cid: cid.to_string(), cid: cid.to_string(),
uri, uri,
languages: record.langs.unwrap_or_default().iter().cloned().collect(), post,
text: record.text,
} }
} }
COLLECTION_LIKE => { COLLECTION_LIKE => {
let record: LikeRecord = read_record(block)?; let like: LikeRecord = read_record(block)?;
Operation::CreateLike { Operation::CreateLike {
author_did: commit.repo.clone(), author_did: commit.repo.clone(),
cid: cid.to_string(), cid: cid.to_string(),
uri, uri,
subject_cid: record.subject.cid, like,
subject_uri: record.subject.uri,
} }
} }
COLLECTION_FOLLOW => { COLLECTION_FOLLOW => {
let record: FollowRecord = read_record(block)?; let follow: FollowRecord = read_record(block)?;
Operation::CreateFollow { Operation::CreateFollow {
author_did: commit.repo.clone(), author_did: commit.repo.clone(),
cid: cid.to_string(), cid: cid.to_string(),
uri, uri,
subject: record.subject, follow,
} }
} }
_ => continue, _ => continue,