From f4ee482ce712dd9a47ec490a5038a42b38b71225 Mon Sep 17 00:00:00 2001 From: Aleksei Voronov Date: Thu, 21 Sep 2023 10:36:47 +0200 Subject: [PATCH] Use Arcs to pass stuff around to avoid dealing with lifetimes And also implement proper language detection through lingua-rs, because Bluesky's detection is really bad --- Cargo.lock | 1070 ++++++++++++++++- Cargo.toml | 1 + src/algos.rs | 50 +- src/algos/nederlandskie.rs | 21 +- src/config.rs | 1 - src/main.rs | 32 +- .../endpoints/describe_feed_generator.rs | 5 +- .../endpoints/get_feed_skeleton.rs | 5 +- src/processes/feed_server/server.rs | 24 +- src/processes/feed_server/state.rs | 8 +- src/processes/post_indexer.rs | 31 +- src/processes/profile_classifier.rs | 13 +- src/services/database.rs | 1 - 13 files changed, 1200 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25af601..196a005 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ "cfg-if", + "const-random", "getrandom", "once_cell", "version_check", @@ -38,6 +39,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "allocator-api2" version = "0.2.16" @@ -273,6 +289,27 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brotli" +version = "3.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" version = "3.14.0" @@ -291,6 +328,15 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +[[package]] +name = "castaway" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc" +dependencies = [ + "rustversion", +] + [[package]] name = "cbor4ii" version = "0.2.14" @@ -384,12 +430,47 @@ dependencies = [ "unsigned-varint", ] +[[package]] +name = "compact_str" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "ryu", + "static_assertions", +] + [[package]] name = "const-oid" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28c122c3980598d243d63d9a704629a2d748d101f278052ff068be5a4423ab6f" +[[package]] +name = "const-random" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" +dependencies = [ + "const-random-macro", + "proc-macro-hack", +] + +[[package]] +name = "const-random-macro" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" +dependencies = [ + "getrandom", + "once_cell", + "proc-macro-hack", + "tiny-keccak", +] + [[package]] name = "constant_time_eq" version = "0.3.0" @@ -445,6 +526,40 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cace84e55f07e7301bae1c519df89cdad8cc3cd868413d3fdbdeca9ff3db484" +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + [[package]] name = "crossbeam-queue" version = "0.3.8" @@ -464,6 +579,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-common" version = "0.1.6" @@ -664,6 +785,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "futures" version = "0.3.28" @@ -999,6 +1130,25 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "include_dir" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18762faeff7122e89e0857b02f7ce6fcc0d101d5e9ad2ad7846cc01d61b7f19e" +dependencies = [ + "include_dir_macros", +] + +[[package]] +name = "include_dir_macros" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b139284b5cf57ecfa712bcc66950bb635b31aff41c188e8a4cfc758eca374a3f" +dependencies = [ + "proc-macro2", + "quote", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -1036,6 +1186,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.11.0" @@ -1151,6 +1310,780 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "lingua" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dc858be45965c868f5fa4a3c57d61842ff8aa2f2b1308eea14babbee5ef2478" +dependencies = [ + "ahash", + "brotli", + "compact_str", + "fraction", + "include_dir", + "itertools 0.10.5", + "lingua-afrikaans-language-model", + "lingua-albanian-language-model", + "lingua-arabic-language-model", + "lingua-armenian-language-model", + "lingua-azerbaijani-language-model", + "lingua-basque-language-model", + "lingua-belarusian-language-model", + "lingua-bengali-language-model", + "lingua-bokmal-language-model", + "lingua-bosnian-language-model", + "lingua-bulgarian-language-model", + "lingua-catalan-language-model", + "lingua-chinese-language-model", + "lingua-croatian-language-model", + "lingua-czech-language-model", + "lingua-danish-language-model", + "lingua-dutch-language-model", + "lingua-english-language-model", + "lingua-esperanto-language-model", + "lingua-estonian-language-model", + "lingua-finnish-language-model", + "lingua-french-language-model", + "lingua-ganda-language-model", + "lingua-georgian-language-model", + "lingua-german-language-model", + "lingua-greek-language-model", + "lingua-gujarati-language-model", + "lingua-hebrew-language-model", + "lingua-hindi-language-model", + "lingua-hungarian-language-model", + "lingua-icelandic-language-model", + "lingua-indonesian-language-model", + "lingua-irish-language-model", + "lingua-italian-language-model", + "lingua-japanese-language-model", + "lingua-kazakh-language-model", + "lingua-korean-language-model", + "lingua-latin-language-model", + "lingua-latvian-language-model", + "lingua-lithuanian-language-model", + "lingua-macedonian-language-model", + "lingua-malay-language-model", + "lingua-maori-language-model", + "lingua-marathi-language-model", + "lingua-mongolian-language-model", + "lingua-nynorsk-language-model", + "lingua-persian-language-model", + "lingua-polish-language-model", + "lingua-portuguese-language-model", + "lingua-punjabi-language-model", + "lingua-romanian-language-model", + "lingua-russian-language-model", + "lingua-serbian-language-model", + "lingua-shona-language-model", + "lingua-slovak-language-model", + "lingua-slovene-language-model", + "lingua-somali-language-model", + "lingua-sotho-language-model", + "lingua-spanish-language-model", + "lingua-swahili-language-model", + "lingua-swedish-language-model", + "lingua-tagalog-language-model", + "lingua-tamil-language-model", + "lingua-telugu-language-model", + "lingua-thai-language-model", + "lingua-tsonga-language-model", + "lingua-tswana-language-model", + "lingua-turkish-language-model", + "lingua-ukrainian-language-model", + "lingua-urdu-language-model", + "lingua-vietnamese-language-model", + "lingua-welsh-language-model", + "lingua-xhosa-language-model", + "lingua-yoruba-language-model", + "lingua-zulu-language-model", + "maplit", + "once_cell", + "rayon", + "regex", + "serde", + "serde-wasm-bindgen", + "serde_json", + "strum", + "strum_macros", + "wasm-bindgen", +] + +[[package]] +name = "lingua-afrikaans-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97df3a7cd698a09753cb32c0f403cf9f698fa0ae1b081fc1b14fc4707301392" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-albanian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1f78f0468f3337d53bd1cdc481e0f1c445e33ce43650d654d8b56037daa1996" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-arabic-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ada75100728990b05bacaec7693efebf0c8b911599ac0c082cb4ed576bf494d" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-armenian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d126f4182866345c5df9cef2e0a3ca64891d7bf0325c31f99df360c00c72bc0f" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-azerbaijani-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "962742b8bed5483adb31cfd12b4ee79fc8dccfa602bb4c25a240579236394892" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-basque-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "710aa0c4203d3c019b624601233b0035723087ba2cb9de8572632cb8c5ac97a4" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-belarusian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5b76ec320c7646f0c8849371530ba5332d64f640fc170c14d63102bd27580f5" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-bengali-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecf9f1c7b00c78984eee240f2188f1781a69a08b87bfb1aabd3e3cc50c0d279d" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-bokmal-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a9215ee6bbe6d37222dc5dbd9d1577389810011cb8afbef6155e71a11fa5cb9" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-bosnian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98f836f559334e2d9c0e1876c87e72431b25412b164efd194a840a13b49d8c3b" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-bulgarian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6639397b34419af35835a594178f93dd84edb3a146938f0f1d7013c1a48a5a1" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-catalan-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0d9acd43a1320961fe4cb8571ebc0e87d9809a0e9d6d71e5cbab86fab923201" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-chinese-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45a1e3a38a0920f6ddbd9b6a79a8c04cd927142713b595ba32be6b811603f7c6" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-croatian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66eaa997d1fa477c42d2497beeb546b63576e04cc6057a60718daabe061c2344" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-czech-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed98f50055cb5c9b06c20da82029c8cfe204332ab3360349545cbc3d465cd1d1" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-danish-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0adc4abde17e94cf023a1ae4f3bcda3b13dcfecfc1f6421f59f17f904d5b5ed2" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-dutch-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30f3db5635a5e964d85190b5d32ca391d9f7630137630458a605c7ab33583db9" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-english-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbd066693abb10083bb5b5fc1c494dda357170265021ad873ef582211817b8a8" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-esperanto-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7877160b3346161f2714236c9ab1c667097821c0dbb31e5bce42f23c82a157c2" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-estonian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a25c07d8a67c82a5c41d8cb2893a4032332d18abadd8d06d80a654f6a1b3c47" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-finnish-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07e0f34931973ec268916cb4c183e06df880e8e43ee99124522ea8c767c9940" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-french-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba406de05309b212790036e576aee4b1a71ce62b77fbd084df5bc8d7c624866f" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-ganda-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9890e6f6b48bf7f2240da770020c5045a1885598c6cb6bc25df73e0182f4792" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-georgian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7392be814065ff3ff71bae7dbb463a47039a68f90d45dd34f4ca7221c0c2f01" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-german-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd4041d542b6512d415c0ea6979333e5e88698b966dda7b9bf06029bf690a13" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-greek-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e1f5a17634410f527a6360979209504072ff9e71e29a0578b18a4f57dcb611b" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-gujarati-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee11b10ffdf8f4eb3d2078c9edec73fb51a36d061ddc6d6e847585a3b91909e" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-hebrew-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a13c9fa3f21bfb91ed3a9710add72c332d0e36234ea9b9d5ed3d6e2235051f6" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-hindi-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63659e514fa8c706d2ed0d7a04b98adb05b8c8d4bc37e3d12edb9d35ce24fd0" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-hungarian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "885525276c696b978c4a1aee8a2b3b9a822724e47348c260d05e9092ebd8c96e" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-icelandic-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b616e573df1aa0bc9881fcef4258d3445fc80f5efe3a71ab010c68eb97ab659d" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-indonesian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc06b8937f6adaad76b28fd009cbfaa6f876ebe5fa887d96dccd1dff2d21d9ad" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-irish-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6258beb6627274163aac7da4dda1e6bcbcf7ff404efa603d0e59e8054f77199" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-italian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6e138047ac4d681e419e37ad9e383878652ac19e67056ff16085d4cc82e590" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-japanese-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34b94aa96a1bc0838ddda2c7912d0ebd7256bb21e7a2974e4dd8b186df501a8" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-kazakh-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b64a848cc1a38dc3c4e7a5ad18b9f2ecbd6abf4c453a8898538d96e6a5c0da4" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-korean-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35c47aa5e5dc8d029967375081aabda462faf10b070aefc1a175ea7f42f5267c" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-latin-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0922f27e68ae15397160ab1835aefb5a878bc14f0c0ce79b6d074ee828b4d7" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-latvian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6cc01bbfda64ccbfb00b5172d7ba9327053da8c38a85c0b71c969533cf8f08" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-lithuanian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9b68e593c8f72e77b1fe823633baae744681bd8def2dce7d2830947b41c4b1e" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-macedonian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e288bc4ec1776131580ed80d842de39bce0355bcc3cc59c519008e60f9276fe" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-malay-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "771c6b7ab2865e2563e1ce19bf06611aad674c3f0a180ec43c03a9883e49c8aa" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-maori-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d653a0c16c5cfaa088bdf75e362ba4f715cbd6dbca391f1637a6c49c1e8aff90" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-marathi-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a141c8d9670ff22e541c064ccab3cfbbc2db2625ec20236d73d76dd98e8ba0" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-mongolian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f53de502920af44b5d8ec7787895c0a81b0f5f43e3f3c2b80a240f2b605122c1" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-nynorsk-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db00b13561df244acb76927677200ff399b02d8c9b160d25a38320ef3361cfa9" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-persian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "043ba9b4ac7df99acaa1cd9f5925d6fc9b0fe850d3891bbeb91b27ed94f266c4" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-polish-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f298b650531a5d85d3101ad7db7a5cbf3b671b71f01a775e97d4165eebc9ff75" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-portuguese-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9df0be307b3f5e24e10e76a7841302c0ff4a6ebf818a11d3a3c5c4cecfcfe58b" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-punjabi-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc529e58e6af7cca366af7fb964b4cad7a357d538d4b6a7eccde31ae4bd7604f" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-romanian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0428c13d8e05e65929ec170e807c8508c2201c63c7802c7786de0c3b78e390b" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-russian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea97d6643e90ee4fc0043c728f0c06cdd9e69d4970fd1983e5c59b3448a676e" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-serbian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7b181be3137969bf5a9e10d8773666d01223886a4d7454048fad99e7b0416db" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-shona-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "509772c502f2a47ec7cd5b50be1bc34b51dcb98cca1f5e30143c96de61a757bb" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-slovak-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44e1e99e6725e99659367ac80731efc24298a7a24190ff3d777209e86be3dc40" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-slovene-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49fc410d0acdf3068cc9c7ae493348d47a26be0d3d8c5c23603c807befd949c1" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-somali-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde8a66f8901e7e5eac9af985ace066bd5ed3643815beaecb2d7a7b86645d0b3" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-sotho-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58084eb81faf32f62c922e8b3f3dfdcadacf5de934edfb7d3b1acb3759d89f04" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-spanish-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d45be9bede9b9d6d53b6e06047822a18dbd83e5d515736afecd85c1e71cc6070" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-swahili-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f251246905cf56e519af87095fd31264417c19de83fb9e2e0ade5e8c27af4fe3" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-swedish-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c0289dc7e5654aac497c829369b811ae1870eb9d5f76c6b82527bbe5470aa0d" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-tagalog-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc5cb889e86a8a62f56f06d0d9bd92d16f697151a9b3dcdefef0ba39624c5cb0" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-tamil-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630d10fabcd5a289b5cd4b651df0c448692a3e55cc7e28334313a2d76d2916e5" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-telugu-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0501f9b52fc247ef4e1f4020fd44a90cccddf9914724977cb3440eced01cbe07" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-thai-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194b09209fc63d986d8ef9505689756eff1935fa52fd91031b60e7d546ca3c76" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-tsonga-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa10493e6bb3bfd8591254a5657eb98bad6be33334c4ebd791edb354fc83b07" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-tswana-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5db016128507f4eb48dbe96b05be134c9385f6752da7949c653a24e3c4661e37" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-turkish-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b517e12fe94ce80e7f8f380fdd2c0223bcfe351c0898c9a83f56f78b3afad81" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-ukrainian-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14ed035dd4b7ec5f76fe3b07e5f499d76c4cdb2a6d275459e4cdd3a3d21f131a" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-urdu-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9edb8e15e88004b5b89e80f1bf41237770c5536805494dcf627f7ec550eaf4" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-vietnamese-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07d08afaf263906aa3f2bfaffaf52eecd368992685e2468f1ba7f85f3e41fd49" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-welsh-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07c47a533b66772fdd7ece069359900b18824851464a4dd4fd8e4b29928d19b1" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-xhosa-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55a48851d35933a57c6f87a4fdc25d7eeba8eff98f1852b00d7c8c15b2a818fb" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-yoruba-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf6365215ffb772d22c18fbf55b1503ad292aa9a5e93d1249d9307117f6b0ef" +dependencies = [ + "include_dir", +] + +[[package]] +name = "lingua-zulu-language-model" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63f8b5c7f412d078f37ffd2874297afce76719c4b1b7c4856af0fa4e8f89abe2" +dependencies = [ + "include_dir", +] + [[package]] name = "linux-raw-sys" version = "0.4.7" @@ -1173,6 +2106,12 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + [[package]] name = "matchit" version = "0.7.2" @@ -1194,6 +2133,15 @@ version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -1303,6 +2251,7 @@ dependencies = [ "dotenv", "futures", "libipld-core", + "lingua", "once_cell", "rs-car", "scooby", @@ -1323,6 +2272,31 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-bigint-dig" version = "0.8.4" @@ -1340,6 +2314,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -1361,6 +2344,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.16" @@ -1583,6 +2578,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + [[package]] name = "proc-macro2" version = "1.0.67" @@ -1631,6 +2632,28 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rayon" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + [[package]] name = "redox_syscall" version = "0.3.5" @@ -1833,6 +2856,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde-wasm-bindgen" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3b143e2833c57ab9ad3ea280d21fd34e285a42837aeb0ee301f4f41890fa00e" +dependencies = [ + "js-sys", + "serde", + "wasm-bindgen", +] + [[package]] name = "serde_bytes" version = "0.11.12" @@ -2026,7 +3060,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b7b278788e7be4d0d29c0f39497a0eef3fba6bbc8e70d8bf7fde46edeaa9e85" dependencies = [ - "itertools", + "itertools 0.11.0", "nom", "unicode_categories", ] @@ -2231,6 +3265,12 @@ dependencies = [ "url", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "stringprep" version = "0.1.4" @@ -2242,6 +3282,25 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", +] + [[package]] name = "subtle" version = "2.5.0" @@ -2330,6 +3389,15 @@ dependencies = [ "syn 2.0.35", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinyvec" version = "1.6.0" diff --git a/Cargo.toml b/Cargo.toml index 1643fb1..e13cc3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ ciborium = "0.2.1" dotenv = "0.15.0" futures = "0.3.28" libipld-core = { version = "0.16.0", features = ["serde-codec"] } +lingua = "1.5.0" once_cell = "1.18.0" rs-car = "0.4.1" scooby = "0.5.0" diff --git a/src/algos.rs b/src/algos.rs index b608203..9fb4ebf 100644 --- a/src/algos.rs +++ b/src/algos.rs @@ -5,11 +5,10 @@ use std::collections::{HashMap, HashSet}; use anyhow::Result; use async_trait::async_trait; use chrono::{DateTime, Utc}; -use once_cell::sync::Lazy; use crate::services::database::{Database, Post}; -use self::nederlandskie::Nederlandskie; +pub use self::nederlandskie::Nederlandskie; #[async_trait] pub trait Algo { @@ -24,22 +23,43 @@ pub trait Algo { } pub type AnyAlgo = Box; -pub type AlgosMap = HashMap<&'static str, AnyAlgo>; +type AlgosMap = HashMap; -static ALL_ALGOS: Lazy = Lazy::new(|| { - let mut m = AlgosMap::new(); - m.insert("nederlandskie", Box::new(Nederlandskie)); - m -}); - -pub fn iter_names() -> impl Iterator { - ALL_ALGOS.keys().map(|s| *s) +pub struct Algos { + algos: AlgosMap, } -pub fn iter_all() -> impl Iterator { - ALL_ALGOS.values() +impl Algos { + pub fn iter_names(&self) -> impl Iterator { + self.algos.keys().map(String::as_str) + } + + pub fn iter_all(&self) -> impl Iterator { + self.algos.values() + } + + pub fn get_by_name(&self, name: &str) -> Option<&AnyAlgo> { + self.algos.get(name) + } } -pub fn get_by_name(name: &str) -> Option<&'static AnyAlgo> { - ALL_ALGOS.get(name) +pub struct AlgosBuilder { + algos: AlgosMap, +} + +impl AlgosBuilder { + pub fn new() -> Self { + Self { + algos: AlgosMap::new(), + } + } + + pub fn add(mut self, name: &str, algo: T) -> Self { + self.algos.insert(name.to_owned(), Box::new(algo)); + self + } + + pub fn build(self) -> Algos { + Algos { algos: self.algos } + } } diff --git a/src/algos/nederlandskie.rs b/src/algos/nederlandskie.rs index f235c31..700fa75 100644 --- a/src/algos/nederlandskie.rs +++ b/src/algos/nederlandskie.rs @@ -1,14 +1,25 @@ use std::collections::HashSet; +use std::sync::Arc; use anyhow::Result; use async_trait::async_trait; use chrono::{DateTime, Utc}; +use lingua::Language::Russian; +use lingua::LanguageDetector; use super::Algo; use crate::services::{database::Post, Database}; -pub struct Nederlandskie; +pub struct Nederlandskie { + language_detector: Arc, +} + +impl Nederlandskie { + pub fn new(language_detector: Arc) -> Self { + Self { language_detector } + } +} /// An algorithm that serves posts written in Russian by people living in Netherlands #[async_trait] @@ -16,12 +27,10 @@ impl Algo for Nederlandskie { fn should_index_post( &self, _author_did: &str, - languages: &HashSet, - _text: &str, + _languages: &HashSet, + text: &str, ) -> bool { - // BlueSky gets confused a lot about Russian vs Ukrainian, so skip posts - // that may be in Ukrainian regardless of whether Russian is in the list - languages.contains("ru") && !languages.contains("uk") + self.language_detector.detect_language_of(text) == Some(Russian) } async fn fetch_posts( diff --git a/src/config.rs b/src/config.rs index f66d103..074b4df 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,7 +2,6 @@ use anyhow::Result; use dotenv::dotenv; use std::env; -#[derive(Clone)] pub struct Config { pub chat_gpt_api_key: String, pub database_url: String, diff --git a/src/main.rs b/src/main.rs index 5bda794..fbda0e3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,8 +3,13 @@ mod config; mod processes; mod services; -use anyhow::Result; +use std::sync::Arc; +use anyhow::Result; +use lingua::LanguageDetectorBuilder; + +use crate::algos::AlgosBuilder; +use crate::algos::Nederlandskie; use crate::config::Config; use crate::processes::FeedServer; use crate::processes::PostIndexer; @@ -15,15 +20,26 @@ use crate::services::AI; #[tokio::main] async fn main() -> Result<()> { - let config = Config::load()?; + let config = Arc::new(Config::load()?); - let ai = AI::new(&config.chat_gpt_api_key, "https://api.openai.com"); - let bluesky = Bluesky::new("https://bsky.social"); - let database = Database::connect(&config.database_url).await?; + let ai = Arc::new(AI::new(&config.chat_gpt_api_key, "https://api.openai.com")); + let bluesky = Arc::new(Bluesky::new("https://bsky.social")); + let database = Arc::new(Database::connect(&config.database_url).await?); + let language_detector = Arc::new( + LanguageDetectorBuilder::from_all_languages() + .with_preloaded_language_models() + .build(), + ); - let post_indexer = PostIndexer::new(&database, &bluesky); - let profile_classifier = ProfileClassifier::new(&database, &ai, &bluesky); - let feed_server = FeedServer::new(&database, &config); + let algos = Arc::new( + AlgosBuilder::new() + .add("nederlandskie", Nederlandskie::new(language_detector)) + .build(), + ); + + let post_indexer = PostIndexer::new(database.clone(), bluesky.clone(), algos.clone()); + let profile_classifier = ProfileClassifier::new(database.clone(), ai.clone(), bluesky.clone()); + let feed_server = FeedServer::new(database.clone(), config.clone(), algos.clone()); tokio::try_join!( post_indexer.start(), diff --git a/src/processes/feed_server/endpoints/describe_feed_generator.rs b/src/processes/feed_server/endpoints/describe_feed_generator.rs index 1648f1f..a52c2e8 100644 --- a/src/processes/feed_server/endpoints/describe_feed_generator.rs +++ b/src/processes/feed_server/endpoints/describe_feed_generator.rs @@ -3,7 +3,6 @@ use atrium_api::app::bsky::feed::describe_feed_generator::{ }; use axum::{extract::State, Json}; -use crate::algos; use crate::processes::feed_server::state::FeedServerState; pub async fn describe_feed_generator( @@ -11,7 +10,9 @@ pub async fn describe_feed_generator( ) -> Json { Json(FeedGeneratorDescription { did: state.config.service_did.clone(), - feeds: algos::iter_names() + feeds: state + .algos + .iter_names() .map(|name| Feed { uri: format!( "at://{}/app.bsky.feed.generator/{}", diff --git a/src/processes/feed_server/endpoints/get_feed_skeleton.rs b/src/processes/feed_server/endpoints/get_feed_skeleton.rs index a7b507d..bf6171c 100644 --- a/src/processes/feed_server/endpoints/get_feed_skeleton.rs +++ b/src/processes/feed_server/endpoints/get_feed_skeleton.rs @@ -7,14 +7,15 @@ use axum::extract::{Query, State}; use axum::Json; use chrono::{DateTime, TimeZone, Utc}; -use crate::algos; use crate::processes::feed_server::state::FeedServerState; pub async fn get_feed_skeleton( State(state): State, query: Query, ) -> Json { - let algo = algos::get_by_name(&query.feed) + let algo = state + .algos + .get_by_name(&query.feed) .ok_or_else(|| anyhow!("Feed {} not found", query.feed)) .unwrap(); // TODO: handle error diff --git a/src/processes/feed_server/server.rs b/src/processes/feed_server/server.rs index aa58f44..f7796b9 100644 --- a/src/processes/feed_server/server.rs +++ b/src/processes/feed_server/server.rs @@ -1,23 +1,30 @@ use std::net::SocketAddr; +use std::sync::Arc; use anyhow::Result; use axum::routing::get; use axum::{Router, Server}; +use crate::algos::Algos; use crate::config::Config; use crate::services::Database; use super::endpoints::{describe_feed_generator, did_json, get_feed_skeleton, root}; use super::state::FeedServerState; -pub struct FeedServer<'a> { - database: &'a Database, - config: &'a Config, +pub struct FeedServer { + database: Arc, + config: Arc, + algos: Arc, } -impl<'a> FeedServer<'a> { - pub fn new(database: &'a Database, config: &'a Config) -> Self { - Self { database, config } +impl FeedServer { + pub fn new(database: Arc, config: Arc, algos: Arc) -> Self { + Self { + database, + config, + algos, + } } pub async fn serve(self) -> Result<()> { @@ -33,8 +40,9 @@ impl<'a> FeedServer<'a> { get(get_feed_skeleton), ) .with_state(FeedServerState { - database: self.database.clone(), - config: self.config.clone(), + database: self.database, + config: self.config, + algos: self.algos, }); let addr = SocketAddr::from(([127, 0, 0, 1], 3000)); diff --git a/src/processes/feed_server/state.rs b/src/processes/feed_server/state.rs index 5188854..2cc8a44 100644 --- a/src/processes/feed_server/state.rs +++ b/src/processes/feed_server/state.rs @@ -1,8 +1,12 @@ +use std::sync::Arc; + +use crate::algos::Algos; use crate::config::Config; use crate::services::Database; #[derive(Clone)] pub struct FeedServerState { - pub database: Database, - pub config: Config, + pub database: Arc, + pub config: Arc, + pub algos: Arc, } diff --git a/src/processes/post_indexer.rs b/src/processes/post_indexer.rs index 7554210..91caa6e 100644 --- a/src/processes/post_indexer.rs +++ b/src/processes/post_indexer.rs @@ -1,29 +1,36 @@ +use std::sync::Arc; + use anyhow::Result; use async_trait::async_trait; -use crate::algos; +use crate::algos::Algos; use crate::services::bluesky::{Bluesky, Operation, OperationProcessor}; use crate::services::Database; -pub struct PostIndexer<'a> { - database: &'a Database, - bluesky: &'a Bluesky, +pub struct PostIndexer { + database: Arc, + bluesky: Arc, + algos: Arc, } -impl<'a> PostIndexer<'a> { - pub fn new(database: &'a Database, bluesky: &'a Bluesky) -> Self { - Self { database, bluesky } +impl PostIndexer { + pub fn new(database: Arc, bluesky: Arc, algos: Arc) -> Self { + Self { + database, + bluesky, + algos, + } } } -impl<'a> PostIndexer<'a> { +impl PostIndexer { pub async fn start(&self) -> Result<()> { Ok(self.bluesky.subscribe_to_operations(self).await?) } } #[async_trait] -impl<'a> OperationProcessor for PostIndexer<'a> { +impl OperationProcessor for PostIndexer { async fn process_operation(&self, operation: &Operation) -> Result<()> { match operation { Operation::CreatePost { @@ -33,7 +40,11 @@ impl<'a> OperationProcessor for PostIndexer<'a> { languages, text, } => { - if algos::iter_all().any(|a| a.should_index_post(author_did, languages, text)) { + if self + .algos + .iter_all() + .any(|a| a.should_index_post(author_did, languages, text)) + { println!("received insertable post from {author_did}: {text}"); self.database diff --git a/src/processes/profile_classifier.rs b/src/processes/profile_classifier.rs index b74c509..ace45c2 100644 --- a/src/processes/profile_classifier.rs +++ b/src/processes/profile_classifier.rs @@ -1,3 +1,4 @@ +use std::sync::Arc; use std::time::Duration; use anyhow::Result; @@ -6,14 +7,14 @@ use crate::services::Bluesky; use crate::services::Database; use crate::services::AI; -pub struct ProfileClassifier<'a> { - database: &'a Database, - ai: &'a AI, - bluesky: &'a Bluesky, +pub struct ProfileClassifier { + database: Arc, + ai: Arc, + bluesky: Arc, } -impl<'a> ProfileClassifier<'a> { - pub fn new(database: &'a Database, ai: &'a AI, bluesky: &'a Bluesky) -> Self { +impl ProfileClassifier { + pub fn new(database: Arc, ai: Arc, bluesky: Arc) -> Self { Self { database, ai, diff --git a/src/services/database.rs b/src/services/database.rs index 8e387f6..8a66e81 100644 --- a/src/services/database.rs +++ b/src/services/database.rs @@ -24,7 +24,6 @@ pub struct SubscriptionState { cursor: i64, } -#[derive(Clone)] pub struct Database { connection_pool: PgPool, }