Pass around bluesky objects instead of restructuring them everywhere
Makes thing a little simpler in many places, especially once I start adding more fields to posts
This commit is contained in:
parent
5eeb0e45b1
commit
96915ca986
15
src/algos.rs
15
src/algos.rs
|
@ -1,30 +1,27 @@
|
|||
mod nederlandskie;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
use crate::services::database::{Database, Post};
|
||||
use crate::services::bluesky;
|
||||
use crate::services::database::{self, Database};
|
||||
|
||||
pub use self::nederlandskie::Nederlandskie;
|
||||
|
||||
#[async_trait]
|
||||
pub trait Algo {
|
||||
async fn should_index_post(
|
||||
&self,
|
||||
author_did: &str,
|
||||
languages: &HashSet<String>,
|
||||
text: &str,
|
||||
) -> Result<bool>;
|
||||
async fn should_index_post(&self, author_did: &str, post: &bluesky::PostRecord)
|
||||
-> Result<bool>;
|
||||
|
||||
async fn fetch_posts(
|
||||
&self,
|
||||
database: &Database,
|
||||
limit: i32,
|
||||
earlier_than: Option<(DateTime<Utc>, &str)>,
|
||||
) -> Result<Vec<Post>>;
|
||||
) -> Result<Vec<database::Post>>;
|
||||
}
|
||||
|
||||
pub type AnyAlgo = Box<dyn Algo + Sync + Send>;
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
|
@ -9,7 +8,8 @@ use lingua::LanguageDetector;
|
|||
|
||||
use super::Algo;
|
||||
|
||||
use crate::services::{database::Post, Database};
|
||||
use crate::services::bluesky;
|
||||
use crate::services::database::{self, Database};
|
||||
|
||||
pub struct Nederlandskie {
|
||||
language_detector: Arc<LanguageDetector>,
|
||||
|
@ -27,10 +27,9 @@ impl Algo for Nederlandskie {
|
|||
async fn should_index_post(
|
||||
&self,
|
||||
_author_did: &str,
|
||||
_languages: &HashSet<String>,
|
||||
text: &str,
|
||||
post: &bluesky::PostRecord,
|
||||
) -> Result<bool> {
|
||||
Ok(self.language_detector.detect_language_of(text) == Some(Russian))
|
||||
Ok(self.language_detector.detect_language_of(&post.text) == Some(Russian))
|
||||
}
|
||||
|
||||
async fn fetch_posts(
|
||||
|
@ -38,7 +37,7 @@ impl Algo for Nederlandskie {
|
|||
database: &Database,
|
||||
limit: i32,
|
||||
earlier_than: Option<(DateTime<Utc>, &str)>,
|
||||
) -> Result<Vec<Post>> {
|
||||
) -> Result<Vec<database::Post>> {
|
||||
Ok(database
|
||||
.fetch_posts_by_authors_country("nl", limit as usize, earlier_than)
|
||||
.await?)
|
||||
|
|
|
@ -75,12 +75,11 @@ impl CommitProcessor for PostIndexer {
|
|||
author_did,
|
||||
cid,
|
||||
uri,
|
||||
languages,
|
||||
text,
|
||||
post,
|
||||
} => {
|
||||
for algo in self.algos.iter_all() {
|
||||
if algo.should_index_post(author_did, languages, text).await? {
|
||||
info!("Received insertable post from {author_did}: {text}");
|
||||
if algo.should_index_post(author_did, post).await? {
|
||||
info!("Received insertable post from {author_did}: {}", post.text);
|
||||
|
||||
self.database
|
||||
.insert_profile_if_it_doesnt_exist(author_did)
|
||||
|
|
|
@ -4,4 +4,5 @@ mod internals;
|
|||
mod streaming;
|
||||
|
||||
pub use client::Bluesky;
|
||||
pub use entities::{FollowRecord, LikeRecord, PostRecord, Session};
|
||||
pub use streaming::{CommitDetails, CommitProcessor, Operation};
|
||||
|
|
|
@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
|
|||
|
||||
use crate::services::bluesky::internals::cbor::CborValue;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FollowRecord {
|
||||
pub subject: String,
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
|
|||
|
||||
use crate::services::bluesky::internals::cbor::CborValue;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LikeRecord {
|
||||
pub subject: Subject,
|
||||
}
|
||||
|
@ -23,6 +24,7 @@ impl TryFrom<CborValue> for LikeRecord {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Subject {
|
||||
pub cid: String,
|
||||
pub uri: String,
|
||||
|
|
|
@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
|
|||
|
||||
use crate::services::bluesky::internals::cbor::CborValue;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PostRecord {
|
||||
pub langs: Option<Vec<String>>,
|
||||
pub text: String,
|
||||
|
@ -20,7 +21,10 @@ impl TryFrom<CborValue> for PostRecord {
|
|||
.remove("text")
|
||||
.ok_or_else(|| anyhow!("Missing field: text"))?
|
||||
.try_into()?,
|
||||
langs: map.remove("langs").map(|value| value.try_into()).transpose()?,
|
||||
langs: map
|
||||
.remove("langs")
|
||||
.map(|value| value.try_into())
|
||||
.transpose()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
@ -35,21 +35,19 @@ pub enum Operation {
|
|||
author_did: String,
|
||||
cid: String,
|
||||
uri: String,
|
||||
languages: HashSet<String>,
|
||||
text: String,
|
||||
post: PostRecord,
|
||||
},
|
||||
CreateLike {
|
||||
author_did: String,
|
||||
cid: String,
|
||||
uri: String,
|
||||
subject_cid: String,
|
||||
subject_uri: String,
|
||||
like: LikeRecord,
|
||||
},
|
||||
CreateFollow {
|
||||
author_did: String,
|
||||
cid: String,
|
||||
uri: String,
|
||||
subject: String,
|
||||
follow: FollowRecord,
|
||||
},
|
||||
DeletePost {
|
||||
uri: String,
|
||||
|
@ -116,35 +114,33 @@ async fn extract_operations(commit: &Commit) -> Result<Vec<Operation>> {
|
|||
|
||||
match collection {
|
||||
COLLECTION_POST => {
|
||||
let record: PostRecord = read_record(block)?;
|
||||
let post: PostRecord = read_record(block)?;
|
||||
|
||||
Operation::CreatePost {
|
||||
author_did: commit.repo.clone(),
|
||||
cid: cid.to_string(),
|
||||
uri,
|
||||
languages: record.langs.unwrap_or_default().iter().cloned().collect(),
|
||||
text: record.text,
|
||||
post,
|
||||
}
|
||||
}
|
||||
COLLECTION_LIKE => {
|
||||
let record: LikeRecord = read_record(block)?;
|
||||
let like: LikeRecord = read_record(block)?;
|
||||
|
||||
Operation::CreateLike {
|
||||
author_did: commit.repo.clone(),
|
||||
cid: cid.to_string(),
|
||||
uri,
|
||||
subject_cid: record.subject.cid,
|
||||
subject_uri: record.subject.uri,
|
||||
like,
|
||||
}
|
||||
}
|
||||
COLLECTION_FOLLOW => {
|
||||
let record: FollowRecord = read_record(block)?;
|
||||
let follow: FollowRecord = read_record(block)?;
|
||||
|
||||
Operation::CreateFollow {
|
||||
author_did: commit.repo.clone(),
|
||||
cid: cid.to_string(),
|
||||
uri,
|
||||
subject: record.subject,
|
||||
follow,
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
|
|
Loading…
Reference in New Issue