Pass around bluesky objects instead of restructuring them everywhere
Makes thing a little simpler in many places, especially once I start adding more fields to posts
This commit is contained in:
parent
5eeb0e45b1
commit
96915ca986
15
src/algos.rs
15
src/algos.rs
|
@ -1,30 +1,27 @@
|
||||||
mod nederlandskie;
|
mod nederlandskie;
|
||||||
|
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
|
||||||
use crate::services::database::{Database, Post};
|
use crate::services::bluesky;
|
||||||
|
use crate::services::database::{self, Database};
|
||||||
|
|
||||||
pub use self::nederlandskie::Nederlandskie;
|
pub use self::nederlandskie::Nederlandskie;
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait Algo {
|
pub trait Algo {
|
||||||
async fn should_index_post(
|
async fn should_index_post(&self, author_did: &str, post: &bluesky::PostRecord)
|
||||||
&self,
|
-> Result<bool>;
|
||||||
author_did: &str,
|
|
||||||
languages: &HashSet<String>,
|
|
||||||
text: &str,
|
|
||||||
) -> Result<bool>;
|
|
||||||
|
|
||||||
async fn fetch_posts(
|
async fn fetch_posts(
|
||||||
&self,
|
&self,
|
||||||
database: &Database,
|
database: &Database,
|
||||||
limit: i32,
|
limit: i32,
|
||||||
earlier_than: Option<(DateTime<Utc>, &str)>,
|
earlier_than: Option<(DateTime<Utc>, &str)>,
|
||||||
) -> Result<Vec<Post>>;
|
) -> Result<Vec<database::Post>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type AnyAlgo = Box<dyn Algo + Sync + Send>;
|
pub type AnyAlgo = Box<dyn Algo + Sync + Send>;
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
@ -9,7 +8,8 @@ use lingua::LanguageDetector;
|
||||||
|
|
||||||
use super::Algo;
|
use super::Algo;
|
||||||
|
|
||||||
use crate::services::{database::Post, Database};
|
use crate::services::bluesky;
|
||||||
|
use crate::services::database::{self, Database};
|
||||||
|
|
||||||
pub struct Nederlandskie {
|
pub struct Nederlandskie {
|
||||||
language_detector: Arc<LanguageDetector>,
|
language_detector: Arc<LanguageDetector>,
|
||||||
|
@ -27,10 +27,9 @@ impl Algo for Nederlandskie {
|
||||||
async fn should_index_post(
|
async fn should_index_post(
|
||||||
&self,
|
&self,
|
||||||
_author_did: &str,
|
_author_did: &str,
|
||||||
_languages: &HashSet<String>,
|
post: &bluesky::PostRecord,
|
||||||
text: &str,
|
|
||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
Ok(self.language_detector.detect_language_of(text) == Some(Russian))
|
Ok(self.language_detector.detect_language_of(&post.text) == Some(Russian))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_posts(
|
async fn fetch_posts(
|
||||||
|
@ -38,7 +37,7 @@ impl Algo for Nederlandskie {
|
||||||
database: &Database,
|
database: &Database,
|
||||||
limit: i32,
|
limit: i32,
|
||||||
earlier_than: Option<(DateTime<Utc>, &str)>,
|
earlier_than: Option<(DateTime<Utc>, &str)>,
|
||||||
) -> Result<Vec<Post>> {
|
) -> Result<Vec<database::Post>> {
|
||||||
Ok(database
|
Ok(database
|
||||||
.fetch_posts_by_authors_country("nl", limit as usize, earlier_than)
|
.fetch_posts_by_authors_country("nl", limit as usize, earlier_than)
|
||||||
.await?)
|
.await?)
|
||||||
|
|
|
@ -75,12 +75,11 @@ impl CommitProcessor for PostIndexer {
|
||||||
author_did,
|
author_did,
|
||||||
cid,
|
cid,
|
||||||
uri,
|
uri,
|
||||||
languages,
|
post,
|
||||||
text,
|
|
||||||
} => {
|
} => {
|
||||||
for algo in self.algos.iter_all() {
|
for algo in self.algos.iter_all() {
|
||||||
if algo.should_index_post(author_did, languages, text).await? {
|
if algo.should_index_post(author_did, post).await? {
|
||||||
info!("Received insertable post from {author_did}: {text}");
|
info!("Received insertable post from {author_did}: {}", post.text);
|
||||||
|
|
||||||
self.database
|
self.database
|
||||||
.insert_profile_if_it_doesnt_exist(author_did)
|
.insert_profile_if_it_doesnt_exist(author_did)
|
||||||
|
|
|
@ -4,4 +4,5 @@ mod internals;
|
||||||
mod streaming;
|
mod streaming;
|
||||||
|
|
||||||
pub use client::Bluesky;
|
pub use client::Bluesky;
|
||||||
|
pub use entities::{FollowRecord, LikeRecord, PostRecord, Session};
|
||||||
pub use streaming::{CommitDetails, CommitProcessor, Operation};
|
pub use streaming::{CommitDetails, CommitProcessor, Operation};
|
||||||
|
|
|
@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
|
||||||
|
|
||||||
use crate::services::bluesky::internals::cbor::CborValue;
|
use crate::services::bluesky::internals::cbor::CborValue;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct FollowRecord {
|
pub struct FollowRecord {
|
||||||
pub subject: String,
|
pub subject: String,
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
|
||||||
|
|
||||||
use crate::services::bluesky::internals::cbor::CborValue;
|
use crate::services::bluesky::internals::cbor::CborValue;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct LikeRecord {
|
pub struct LikeRecord {
|
||||||
pub subject: Subject,
|
pub subject: Subject,
|
||||||
}
|
}
|
||||||
|
@ -23,6 +24,7 @@ impl TryFrom<CborValue> for LikeRecord {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct Subject {
|
pub struct Subject {
|
||||||
pub cid: String,
|
pub cid: String,
|
||||||
pub uri: String,
|
pub uri: String,
|
||||||
|
|
|
@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
|
||||||
|
|
||||||
use crate::services::bluesky::internals::cbor::CborValue;
|
use crate::services::bluesky::internals::cbor::CborValue;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct PostRecord {
|
pub struct PostRecord {
|
||||||
pub langs: Option<Vec<String>>,
|
pub langs: Option<Vec<String>>,
|
||||||
pub text: String,
|
pub text: String,
|
||||||
|
@ -20,7 +21,10 @@ impl TryFrom<CborValue> for PostRecord {
|
||||||
.remove("text")
|
.remove("text")
|
||||||
.ok_or_else(|| anyhow!("Missing field: text"))?
|
.ok_or_else(|| anyhow!("Missing field: text"))?
|
||||||
.try_into()?,
|
.try_into()?,
|
||||||
langs: map.remove("langs").map(|value| value.try_into()).transpose()?,
|
langs: map
|
||||||
|
.remove("langs")
|
||||||
|
.map(|value| value.try_into())
|
||||||
|
.transpose()?,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
@ -35,21 +35,19 @@ pub enum Operation {
|
||||||
author_did: String,
|
author_did: String,
|
||||||
cid: String,
|
cid: String,
|
||||||
uri: String,
|
uri: String,
|
||||||
languages: HashSet<String>,
|
post: PostRecord,
|
||||||
text: String,
|
|
||||||
},
|
},
|
||||||
CreateLike {
|
CreateLike {
|
||||||
author_did: String,
|
author_did: String,
|
||||||
cid: String,
|
cid: String,
|
||||||
uri: String,
|
uri: String,
|
||||||
subject_cid: String,
|
like: LikeRecord,
|
||||||
subject_uri: String,
|
|
||||||
},
|
},
|
||||||
CreateFollow {
|
CreateFollow {
|
||||||
author_did: String,
|
author_did: String,
|
||||||
cid: String,
|
cid: String,
|
||||||
uri: String,
|
uri: String,
|
||||||
subject: String,
|
follow: FollowRecord,
|
||||||
},
|
},
|
||||||
DeletePost {
|
DeletePost {
|
||||||
uri: String,
|
uri: String,
|
||||||
|
@ -116,35 +114,33 @@ async fn extract_operations(commit: &Commit) -> Result<Vec<Operation>> {
|
||||||
|
|
||||||
match collection {
|
match collection {
|
||||||
COLLECTION_POST => {
|
COLLECTION_POST => {
|
||||||
let record: PostRecord = read_record(block)?;
|
let post: PostRecord = read_record(block)?;
|
||||||
|
|
||||||
Operation::CreatePost {
|
Operation::CreatePost {
|
||||||
author_did: commit.repo.clone(),
|
author_did: commit.repo.clone(),
|
||||||
cid: cid.to_string(),
|
cid: cid.to_string(),
|
||||||
uri,
|
uri,
|
||||||
languages: record.langs.unwrap_or_default().iter().cloned().collect(),
|
post,
|
||||||
text: record.text,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
COLLECTION_LIKE => {
|
COLLECTION_LIKE => {
|
||||||
let record: LikeRecord = read_record(block)?;
|
let like: LikeRecord = read_record(block)?;
|
||||||
|
|
||||||
Operation::CreateLike {
|
Operation::CreateLike {
|
||||||
author_did: commit.repo.clone(),
|
author_did: commit.repo.clone(),
|
||||||
cid: cid.to_string(),
|
cid: cid.to_string(),
|
||||||
uri,
|
uri,
|
||||||
subject_cid: record.subject.cid,
|
like,
|
||||||
subject_uri: record.subject.uri,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
COLLECTION_FOLLOW => {
|
COLLECTION_FOLLOW => {
|
||||||
let record: FollowRecord = read_record(block)?;
|
let follow: FollowRecord = read_record(block)?;
|
||||||
|
|
||||||
Operation::CreateFollow {
|
Operation::CreateFollow {
|
||||||
author_did: commit.repo.clone(),
|
author_did: commit.repo.clone(),
|
||||||
cid: cid.to_string(),
|
cid: cid.to_string(),
|
||||||
uri,
|
uri,
|
||||||
subject: record.subject,
|
follow,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => continue,
|
_ => continue,
|
||||||
|
|
Loading…
Reference in New Issue