Pass around bluesky objects instead of restructuring them everywhere
Makes thing a little simpler in many places, especially once I start adding more fields to posts
This commit is contained in:
		
							parent
							
								
									5eeb0e45b1
								
							
						
					
					
						commit
						96915ca986
					
				
							
								
								
									
										15
									
								
								src/algos.rs
								
								
								
								
							
							
						
						
									
										15
									
								
								src/algos.rs
								
								
								
								
							| 
						 | 
				
			
			@ -1,30 +1,27 @@
 | 
			
		|||
mod nederlandskie;
 | 
			
		||||
 | 
			
		||||
use std::collections::{HashMap, HashSet};
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
 | 
			
		||||
use anyhow::Result;
 | 
			
		||||
use async_trait::async_trait;
 | 
			
		||||
use chrono::{DateTime, Utc};
 | 
			
		||||
 | 
			
		||||
use crate::services::database::{Database, Post};
 | 
			
		||||
use crate::services::bluesky;
 | 
			
		||||
use crate::services::database::{self, Database};
 | 
			
		||||
 | 
			
		||||
pub use self::nederlandskie::Nederlandskie;
 | 
			
		||||
 | 
			
		||||
#[async_trait]
 | 
			
		||||
pub trait Algo {
 | 
			
		||||
    async fn should_index_post(
 | 
			
		||||
        &self,
 | 
			
		||||
        author_did: &str,
 | 
			
		||||
        languages: &HashSet<String>,
 | 
			
		||||
        text: &str,
 | 
			
		||||
    ) -> Result<bool>;
 | 
			
		||||
    async fn should_index_post(&self, author_did: &str, post: &bluesky::PostRecord)
 | 
			
		||||
        -> Result<bool>;
 | 
			
		||||
 | 
			
		||||
    async fn fetch_posts(
 | 
			
		||||
        &self,
 | 
			
		||||
        database: &Database,
 | 
			
		||||
        limit: i32,
 | 
			
		||||
        earlier_than: Option<(DateTime<Utc>, &str)>,
 | 
			
		||||
    ) -> Result<Vec<Post>>;
 | 
			
		||||
    ) -> Result<Vec<database::Post>>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub type AnyAlgo = Box<dyn Algo + Sync + Send>;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,4 +1,3 @@
 | 
			
		|||
use std::collections::HashSet;
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
 | 
			
		||||
use anyhow::Result;
 | 
			
		||||
| 
						 | 
				
			
			@ -9,7 +8,8 @@ use lingua::LanguageDetector;
 | 
			
		|||
 | 
			
		||||
use super::Algo;
 | 
			
		||||
 | 
			
		||||
use crate::services::{database::Post, Database};
 | 
			
		||||
use crate::services::bluesky;
 | 
			
		||||
use crate::services::database::{self, Database};
 | 
			
		||||
 | 
			
		||||
pub struct Nederlandskie {
 | 
			
		||||
    language_detector: Arc<LanguageDetector>,
 | 
			
		||||
| 
						 | 
				
			
			@ -27,10 +27,9 @@ impl Algo for Nederlandskie {
 | 
			
		|||
    async fn should_index_post(
 | 
			
		||||
        &self,
 | 
			
		||||
        _author_did: &str,
 | 
			
		||||
        _languages: &HashSet<String>,
 | 
			
		||||
        text: &str,
 | 
			
		||||
        post: &bluesky::PostRecord,
 | 
			
		||||
    ) -> Result<bool> {
 | 
			
		||||
        Ok(self.language_detector.detect_language_of(text) == Some(Russian))
 | 
			
		||||
        Ok(self.language_detector.detect_language_of(&post.text) == Some(Russian))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async fn fetch_posts(
 | 
			
		||||
| 
						 | 
				
			
			@ -38,7 +37,7 @@ impl Algo for Nederlandskie {
 | 
			
		|||
        database: &Database,
 | 
			
		||||
        limit: i32,
 | 
			
		||||
        earlier_than: Option<(DateTime<Utc>, &str)>,
 | 
			
		||||
    ) -> Result<Vec<Post>> {
 | 
			
		||||
    ) -> Result<Vec<database::Post>> {
 | 
			
		||||
        Ok(database
 | 
			
		||||
            .fetch_posts_by_authors_country("nl", limit as usize, earlier_than)
 | 
			
		||||
            .await?)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -75,12 +75,11 @@ impl CommitProcessor for PostIndexer {
 | 
			
		|||
                    author_did,
 | 
			
		||||
                    cid,
 | 
			
		||||
                    uri,
 | 
			
		||||
                    languages,
 | 
			
		||||
                    text,
 | 
			
		||||
                    post,
 | 
			
		||||
                } => {
 | 
			
		||||
                    for algo in self.algos.iter_all() {
 | 
			
		||||
                        if algo.should_index_post(author_did, languages, text).await? {
 | 
			
		||||
                            info!("Received insertable post from {author_did}: {text}");
 | 
			
		||||
                        if algo.should_index_post(author_did, post).await? {
 | 
			
		||||
                            info!("Received insertable post from {author_did}: {}", post.text);
 | 
			
		||||
 | 
			
		||||
                            self.database
 | 
			
		||||
                                .insert_profile_if_it_doesnt_exist(author_did)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,4 +4,5 @@ mod internals;
 | 
			
		|||
mod streaming;
 | 
			
		||||
 | 
			
		||||
pub use client::Bluesky;
 | 
			
		||||
pub use entities::{FollowRecord, LikeRecord, PostRecord, Session};
 | 
			
		||||
pub use streaming::{CommitDetails, CommitProcessor, Operation};
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
 | 
			
		|||
 | 
			
		||||
use crate::services::bluesky::internals::cbor::CborValue;
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
pub struct FollowRecord {
 | 
			
		||||
    pub subject: String,
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
 | 
			
		|||
 | 
			
		||||
use crate::services::bluesky::internals::cbor::CborValue;
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
pub struct LikeRecord {
 | 
			
		||||
    pub subject: Subject,
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -23,6 +24,7 @@ impl TryFrom<CborValue> for LikeRecord {
 | 
			
		|||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
pub struct Subject {
 | 
			
		||||
    pub cid: String,
 | 
			
		||||
    pub uri: String,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,6 +4,7 @@ use anyhow::{anyhow, Error, Result};
 | 
			
		|||
 | 
			
		||||
use crate::services::bluesky::internals::cbor::CborValue;
 | 
			
		||||
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
pub struct PostRecord {
 | 
			
		||||
    pub langs: Option<Vec<String>>,
 | 
			
		||||
    pub text: String,
 | 
			
		||||
| 
						 | 
				
			
			@ -20,7 +21,10 @@ impl TryFrom<CborValue> for PostRecord {
 | 
			
		|||
                .remove("text")
 | 
			
		||||
                .ok_or_else(|| anyhow!("Missing field: text"))?
 | 
			
		||||
                .try_into()?,
 | 
			
		||||
            langs: map.remove("langs").map(|value| value.try_into()).transpose()?,
 | 
			
		||||
            langs: map
 | 
			
		||||
                .remove("langs")
 | 
			
		||||
                .map(|value| value.try_into())
 | 
			
		||||
                .transpose()?,
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,4 +1,4 @@
 | 
			
		|||
use std::collections::{HashMap, HashSet};
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
 | 
			
		||||
use anyhow::Result;
 | 
			
		||||
use async_trait::async_trait;
 | 
			
		||||
| 
						 | 
				
			
			@ -35,21 +35,19 @@ pub enum Operation {
 | 
			
		|||
        author_did: String,
 | 
			
		||||
        cid: String,
 | 
			
		||||
        uri: String,
 | 
			
		||||
        languages: HashSet<String>,
 | 
			
		||||
        text: String,
 | 
			
		||||
        post: PostRecord,
 | 
			
		||||
    },
 | 
			
		||||
    CreateLike {
 | 
			
		||||
        author_did: String,
 | 
			
		||||
        cid: String,
 | 
			
		||||
        uri: String,
 | 
			
		||||
        subject_cid: String,
 | 
			
		||||
        subject_uri: String,
 | 
			
		||||
        like: LikeRecord,
 | 
			
		||||
    },
 | 
			
		||||
    CreateFollow {
 | 
			
		||||
        author_did: String,
 | 
			
		||||
        cid: String,
 | 
			
		||||
        uri: String,
 | 
			
		||||
        subject: String,
 | 
			
		||||
        follow: FollowRecord,
 | 
			
		||||
    },
 | 
			
		||||
    DeletePost {
 | 
			
		||||
        uri: String,
 | 
			
		||||
| 
						 | 
				
			
			@ -116,35 +114,33 @@ async fn extract_operations(commit: &Commit) -> Result<Vec<Operation>> {
 | 
			
		|||
 | 
			
		||||
                match collection {
 | 
			
		||||
                    COLLECTION_POST => {
 | 
			
		||||
                        let record: PostRecord = read_record(block)?;
 | 
			
		||||
                        let post: PostRecord = read_record(block)?;
 | 
			
		||||
 | 
			
		||||
                        Operation::CreatePost {
 | 
			
		||||
                            author_did: commit.repo.clone(),
 | 
			
		||||
                            cid: cid.to_string(),
 | 
			
		||||
                            uri,
 | 
			
		||||
                            languages: record.langs.unwrap_or_default().iter().cloned().collect(),
 | 
			
		||||
                            text: record.text,
 | 
			
		||||
                            post,
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    COLLECTION_LIKE => {
 | 
			
		||||
                        let record: LikeRecord = read_record(block)?;
 | 
			
		||||
                        let like: LikeRecord = read_record(block)?;
 | 
			
		||||
 | 
			
		||||
                        Operation::CreateLike {
 | 
			
		||||
                            author_did: commit.repo.clone(),
 | 
			
		||||
                            cid: cid.to_string(),
 | 
			
		||||
                            uri,
 | 
			
		||||
                            subject_cid: record.subject.cid,
 | 
			
		||||
                            subject_uri: record.subject.uri,
 | 
			
		||||
                            like,
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    COLLECTION_FOLLOW => {
 | 
			
		||||
                        let record: FollowRecord = read_record(block)?;
 | 
			
		||||
                        let follow: FollowRecord = read_record(block)?;
 | 
			
		||||
 | 
			
		||||
                        Operation::CreateFollow {
 | 
			
		||||
                            author_did: commit.repo.clone(),
 | 
			
		||||
                            cid: cid.to_string(),
 | 
			
		||||
                            uri,
 | 
			
		||||
                            subject: record.subject,
 | 
			
		||||
                            follow,
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    _ => continue,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue