diff --git a/.env b/.env index 292f2d3e3..8ec9495c5 100644 --- a/.env +++ b/.env @@ -1,4 +1,3 @@ -INDEX_CURSEFORGE=false DEBUG=true CDN_URL=cdn.modrinth.com @@ -14,3 +13,9 @@ BACKBLAZE_ENABLED=false BACKBLAZE_KEY_ID=none BACKBLAZE_KEY=none BACKBLAZE_BUCKET_ID=none + +INDEX_CURSEFORGE=false +# 1 hour +LOCAL_INDEX_INTERVAL=3600 +# 4 hours +EXTERNAL_INDEX_INTERVAL=14400 diff --git a/Cargo.lock b/Cargo.lock index be8499351..34e87a246 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1145,6 +1145,7 @@ dependencies = [ "serde_json", "sha1", "sqlx", + "sqlx-macros", "thiserror", ] @@ -1257,11 +1258,10 @@ dependencies = [ [[package]] name = "meilisearch-sdk" version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e99e00b31a0e33add02a01c969a108144882ef27915d81292deda71baa6b6eea" +source = "git+https://github.com/Aeledfyr/meilisearch-rust#ba1f1e530cb383f421273f6863378bc9bc222f7b" dependencies = [ "log", - "minreq", + "reqwest", "serde", "serde_json", "urlencoding", @@ -1301,12 +1301,6 @@ dependencies = [ "adler32", ] -[[package]] -name = "minreq" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab229c252995e9d56cc66857f3ab2c41e3138b1a6c92089f013698388e64d6bd" - [[package]] name = "mio" version = "0.6.22" @@ -1349,33 +1343,6 @@ dependencies = [ "ws2_32-sys", ] -[[package]] -name = "modrinth" -version = "0.1.0" -dependencies = [ - "actix-files", - "actix-multipart", - "actix-rt", - "actix-web", - "async-trait", - "base64 0.12.3", - "chrono", - "dotenv", - "env_logger", - "futures", - "futures-timer", - "log", - "meilisearch-sdk", - "rand", - "reqwest", - "serde", - "serde_json", - "sha1", - "sqlx", - "sqlx-macros", - "thiserror", -] - [[package]] name = "native-tls" version = "0.2.4" diff --git a/Cargo.toml b/Cargo.toml index 145f7e88f..89d51604f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,8 +19,6 @@ actix-multipart = "0.2.0" reqwest = {version="0.10.6", features=["json"]} -meilisearch-sdk = "0.1.4" - serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } chrono = { version = "0.4", features = ["serde"] } @@ -50,3 +48,8 @@ git = "https://github.com/launchbadge/sqlx/" branch = "master" default-features = false features = ["runtime-actix", "postgres", "chrono", "offline"] + +[dependencies.meilisearch-sdk] +# Temp fork with some patches +git = "https://github.com/Aeledfyr/meilisearch-rust" +branch = "master" diff --git a/src/main.rs b/src/main.rs index 3dd648419..cab95886d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,16 @@ -use crate::search::indexing::index_mods; use actix_web::middleware::Logger; use actix_web::{web, App, HttpServer}; use env_logger::Env; -use log::info; -use std::env; -use std::fs::File; +use log::{info, warn}; +use search::indexing::index_mods; +use search::indexing::IndexingSettings; use std::sync::Arc; mod database; mod file_hosting; mod models; mod routes; +mod scheduler; mod search; #[actix_rt::main] @@ -24,7 +24,6 @@ async fn main() -> std::io::Result<()> { let pool = database::connect() .await .expect("Database connection failed"); - let client_ref = pool.clone(); let backblaze_enabled = dotenv::var("BACKBLAZE_ENABLED") .ok() @@ -44,22 +43,97 @@ async fn main() -> std::io::Result<()> { Arc::new(file_hosting::MockHost::new()) }; - // Get executable path - let mut exe_path = env::current_exe()?.parent().unwrap().to_path_buf(); - // Create the path to the index lock file - exe_path.push("index.v1.lock"); + // TODO: use a real arg parsing library + let skip_initial = std::env::args().any(|x| x == "skip"); + // Allow manually skipping the initial indexing for quicker iteration + // and startup times. + if skip_initial { + info!("Skipping initial indexing"); + } - // Indexing mods if not already done - if env::args().any(|x| x == "regen") { - // User forced regen of indexing - info!("Forced regeneration of indexes!"); - index_mods(pool).await.expect("Mod indexing failed"); - } else if !exe_path.exists() { - // The indexes were not created, or the version was upgraded - info!("Indexing of mods for first time..."); - index_mods(pool).await.expect("Mod indexing failed"); - // Create the lock file - File::create(exe_path)?; + let mut scheduler = scheduler::Scheduler::new(); + + // The interval in seconds at which the local database is indexed + // for searching. Defaults to 1 hour if unset. + let local_index_interval = std::time::Duration::from_secs( + dotenv::var("LOCAL_INDEX_INTERVAL") + .ok() + .map(|i| i.parse().unwrap()) + .unwrap_or(3600), + ); + + let pool_ref = pool.clone(); + let mut skip = skip_initial; + scheduler.run(local_index_interval, move || { + let pool_ref = pool_ref.clone(); + async move { + if skip { + skip = false; + return; + } + info!("Indexing local database"); + let settings = IndexingSettings { + index_local: true, + index_external: false, + }; + let result = index_mods(pool_ref, settings).await; + if let Err(e) = result { + warn!("Local mod indexing failed: {:?}", e); + } + info!("Done indexing local database"); + } + }); + + let indexing_queue = Arc::new(search::indexing::queue::CreationQueue::new()); + + let queue_ref = indexing_queue.clone(); + let mut skip = skip_initial; + scheduler.run(std::time::Duration::from_secs(15 * 60), move || { + let queue = queue_ref.clone(); + async move { + if skip { + skip = false; + return; + } + info!("Indexing created mod queue"); + let result = search::indexing::queue::index_queue(&*queue).await; + if let Err(e) = result { + warn!("Indexing created mods failed: {:?}", e); + } + info!("Done indexing created mod queue"); + } + }); + + if dotenv::var("INDEX_CURSEFORGE") + .ok() + .and_then(|b| b.parse::().ok()) + .unwrap_or(false) + { + // The interval in seconds at which curseforge is indexed for + // searching. Defaults to 4 hours if unset. + let external_index_interval = std::time::Duration::from_secs( + dotenv::var("EXTERNAL_INDEX_INTERVAL") + .ok() + .map(|i| i.parse().unwrap()) + .unwrap_or(3600 * 4), + ); + + let pool_ref = pool.clone(); + scheduler.run(external_index_interval, move || { + info!("Indexing curseforge"); + let pool_ref = pool_ref.clone(); + async move { + let settings = IndexingSettings { + index_local: false, + index_external: true, + }; + let result = index_mods(pool_ref, settings).await; + if let Err(e) = result { + warn!("External mod indexing failed: {:?}", e); + } + info!("Done indexing curseforge"); + } + }); } info!("Starting Actix HTTP server!"); @@ -69,7 +143,7 @@ async fn main() -> std::io::Result<()> { App::new() .wrap(Logger::default()) .wrap(Logger::new("%a %{User-Agent}i")) - .data(client_ref.clone()) + .data(pool.clone()) .data(file_host.clone()) .service(routes::index_get) .service(routes::mod_search) @@ -89,14 +163,14 @@ fn check_env_vars() { .and_then(|s| s.parse::().ok()) .is_none() { - log::warn!( + warn!( "Variable `{}` missing in dotenv or not of type `{}`", var, std::any::type_name::() ) } } - check_var::("INDEX_CURSEFORGE"); + check_var::("CDN_URL"); check_var::("DATABASE_URL"); check_var::("MEILISEARCH_ADDR"); check_var::("BIND_ADDR"); @@ -109,5 +183,18 @@ fn check_env_vars() { check_var::("BACKBLAZE_KEY_ID"); check_var::("BACKBLAZE_KEY"); check_var::("BACKBLAZE_BUCKET_ID"); + } else { + check_var::("MOCK_FILE_PATH"); } + + check_var::("INDEX_CURSEFORGE"); + if dotenv::var("INDEX_CURSEFORGE") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or(false) + { + check_var::("EXTERNAL_INDEX_INTERVAL"); + } + + check_var::("LOCAL_INDEX_INTERVAL"); } diff --git a/src/models/mods.rs b/src/models/mods.rs index 7df7bbdb3..bf9222889 100644 --- a/src/models/mods.rs +++ b/src/models/mods.rs @@ -131,6 +131,10 @@ pub struct ModLoader(pub String); #[derive(Serialize, Deserialize)] pub struct SearchRequest { pub query: Option, + /// Must match a json 2 deep array of strings `[["categories:misc"]]` + // TODO: We may want to have a better representation of this, so that + // we are less likely to break backwards compatability + pub facets: Option, pub filters: Option, pub version: Option, pub offset: Option, diff --git a/src/routes/mod_creation.rs b/src/routes/mod_creation.rs index e53de194f..76f34181c 100644 --- a/src/routes/mod_creation.rs +++ b/src/routes/mod_creation.rs @@ -3,6 +3,7 @@ use crate::file_hosting::{FileHost, FileHostingError}; use crate::models::error::ApiError; use crate::models::mods::{GameVersion, ModId, VersionId, VersionType}; use crate::models::teams::TeamMember; +use crate::search::indexing::queue::CreationQueue; use actix_multipart::{Field, Multipart}; use actix_web::http::StatusCode; use actix_web::web::Data; @@ -10,6 +11,7 @@ use actix_web::{post, HttpResponse}; use futures::stream::StreamExt; use serde::{Deserialize, Serialize}; use sqlx::postgres::PgPool; +use std::sync::Arc; use thiserror::Error; #[derive(Error, Debug)] @@ -124,7 +126,8 @@ async fn undo_uploads( pub async fn mod_create( payload: Multipart, client: Data, - file_host: Data>, + file_host: Data>, + indexing_queue: Data>, ) -> Result { let mut transaction = client.begin().await?; let mut uploaded_files = Vec::new(); @@ -134,6 +137,7 @@ pub async fn mod_create( &mut transaction, &***file_host, &mut uploaded_files, + &***indexing_queue, ) .await; @@ -159,6 +163,7 @@ async fn mod_create_inner( transaction: &mut sqlx::Transaction<'_, sqlx::Postgres>, file_host: &dyn FileHost, uploaded_files: &mut Vec, + indexing_queue: &CreationQueue, ) -> Result { let cdn_url = dotenv::var("CDN_URL")?; @@ -377,6 +382,45 @@ async fn mod_create_inner( initial_versions: created_versions, }; + let versions_list = mod_builder + .initial_versions + .iter() + .flat_map(|v| { + v.game_versions.iter().map(|id| id.0.to_string()) + // TODO: proper version identifiers, once game versions + // have been implemented + }) + .collect::>() + .into_iter() + .collect::>(); + + let now = chrono::Utc::now(); + let timestamp = now.timestamp(); + let formatted = now.to_string(); + + let index_mod = crate::search::UploadSearchMod { + mod_id: format!("local-{}", mod_id), + title: mod_builder.title.clone(), + description: mod_builder.description.clone(), + categories: create_data.categories.clone(), + versions: versions_list, + page_url: mod_builder.body_url.clone(), + icon_url: mod_builder.icon_url.clone().unwrap(), + // TODO: Author/team info, latest version info + author: String::new(), + author_url: String::new(), + latest_version: String::new(), + downloads: 0, + date_created: formatted.clone(), + created_timestamp: timestamp, + // TODO: store and return modified time + date_modified: formatted, + modified_timestamp: timestamp, + empty: std::borrow::Cow::Borrowed("{}{}{}"), + }; + + indexing_queue.add(index_mod); + let _mod_id = mod_builder.insert(&mut *transaction).await?; // TODO: respond with the new mod info, or with just the new mod id. diff --git a/src/routes/mods.rs b/src/routes/mods.rs index 3620f45c0..ad50a9f49 100644 --- a/src/routes/mods.rs +++ b/src/routes/mods.rs @@ -6,5 +6,6 @@ use actix_web::{get, web, HttpResponse}; pub async fn mod_search( web::Query(info): web::Query, ) -> Result { - Ok(HttpResponse::Ok().json(search_for_mod(&info)?)) + let results = search_for_mod(&info).await?; + Ok(HttpResponse::Ok().json(results)) } diff --git a/src/scheduler.rs b/src/scheduler.rs new file mode 100644 index 000000000..8f6746d4a --- /dev/null +++ b/src/scheduler.rs @@ -0,0 +1,30 @@ +use actix_rt::time; +use actix_rt::Arbiter; +use futures::StreamExt; + +pub struct Scheduler { + arbiter: Arbiter, +} + +impl Scheduler { + pub fn new() -> Self { + Scheduler { + arbiter: Arbiter::new(), + } + } + + pub fn run(&mut self, interval: std::time::Duration, task: F) + where + F: Fn() -> R + Send + 'static, + R: std::future::Future + Send + 'static, + { + let future = time::interval(interval).for_each_concurrent(2, move |_| task()); + self.arbiter.send(future); + } +} + +impl Drop for Scheduler { + fn drop(&mut self) { + self.arbiter.stop(); + } +} diff --git a/src/search/indexing/curseforge_import.rs b/src/search/indexing/curseforge_import.rs index afc666809..3ae5f73f0 100644 --- a/src/search/indexing/curseforge_import.rs +++ b/src/search/indexing/curseforge_import.rs @@ -1,5 +1,5 @@ use super::IndexingError; -use crate::search::SearchMod; +use crate::search::UploadSearchMod; use log::info; use serde::{Deserialize, Serialize}; @@ -48,10 +48,10 @@ pub struct CurseForgeMod { pub async fn index_curseforge( start_index: i32, end_index: i32, -) -> Result, IndexingError> { +) -> Result, IndexingError> { info!("Indexing curseforge mods!"); - let mut docs_to_add: Vec = vec![]; + let mut docs_to_add: Vec = vec![]; let res = reqwest::Client::new() .post("https://addons-ecs.forgesvc.net/api/v2/addon") @@ -177,32 +177,32 @@ pub async fn index_curseforge( .thumbnail_url .replace("/256/256/", "/64/64/"); - docs_to_add.push(SearchMod { - mod_id: -curseforge_mod.id as i64, + let created = curseforge_mod + .date_created + .parse::>()?; + let modified = curseforge_mod + .date_modified + .parse::>()?; + + docs_to_add.push(UploadSearchMod { + mod_id: format!("curse-{}", curseforge_mod.id), author: (&curseforge_mod.authors[0].name).to_string(), title: curseforge_mod.name, description: curseforge_mod.summary.chars().take(150).collect(), - keywords: mod_categories, + categories: mod_categories, versions: mod_game_versions.clone(), downloads: curseforge_mod.download_count as i32, page_url: curseforge_mod.website_url, icon_url, author_url: (&curseforge_mod.authors[0].url).to_string(), - date_created: curseforge_mod.date_created.chars().take(10).collect(), - created: curseforge_mod - .date_created - .parse::>()? - .timestamp(), - date_modified: curseforge_mod.date_modified.chars().take(10).collect(), - updated: curseforge_mod - .date_modified - .parse::>()? - .timestamp(), + date_created: created.to_string(), + created_timestamp: created.timestamp(), + date_modified: modified.to_string(), + modified_timestamp: modified.timestamp(), latest_version, - empty: String::from("{}{}{}"), + empty: std::borrow::Cow::Borrowed("{}{}{}"), }) } - //TODO Reindex every hour for new mods. Ok(docs_to_add) } diff --git a/src/search/indexing/local_import.rs b/src/search/indexing/local_import.rs index 0229ac5d0..7a36d6186 100644 --- a/src/search/indexing/local_import.rs +++ b/src/search/indexing/local_import.rs @@ -2,13 +2,13 @@ use futures::{StreamExt, TryStreamExt}; use log::info; use super::IndexingError; -use crate::search::SearchMod; +use crate::search::UploadSearchMod; use sqlx::postgres::PgPool; -pub async fn index_local(pool: PgPool) -> Result, IndexingError> { +pub async fn index_local(pool: PgPool) -> Result, IndexingError> { info!("Indexing local mods!"); - let mut docs_to_add: Vec = vec![]; + let mut docs_to_add: Vec = vec![]; let mut results = sqlx::query!( " @@ -53,23 +53,25 @@ pub async fn index_local(pool: PgPool) -> Result, IndexingError> icon_url = url; } - docs_to_add.push(SearchMod { - mod_id: result.id, - author: "".to_string(), + let formatted = result.published.to_string(); + let timestamp = result.published.timestamp(); + docs_to_add.push(UploadSearchMod { + mod_id: format!("local-{}", crate::models::ids::ModId(result.id as u64)), title: result.title, description: result.description, - keywords: categories, + categories, versions, downloads: result.downloads, page_url: result.body_url, icon_url, + author: "".to_string(), // TODO: author/team info author_url: "".to_string(), - date_created: result.published.to_string(), - created: 0, - date_modified: "".to_string(), - updated: 0, - latest_version: "".to_string(), - empty: String::from("{}{}{}"), + date_created: formatted.clone(), + created_timestamp: timestamp, + date_modified: formatted, + modified_timestamp: timestamp, + latest_version: "".to_string(), // TODO: Info about latest version + empty: std::borrow::Cow::Borrowed("{}{}{}"), }); } } diff --git a/src/search/indexing/mod.rs b/src/search/indexing/mod.rs index 3317679b9..b4cf46366 100644 --- a/src/search/indexing/mod.rs +++ b/src/search/indexing/mod.rs @@ -1,11 +1,13 @@ /// This module is used for the indexing from any source. pub mod curseforge_import; pub mod local_import; +pub mod queue; -use crate::search::indexing::curseforge_import::index_curseforge; -use crate::search::indexing::local_import::index_local; -use crate::search::SearchMod; +use crate::search::UploadSearchMod; +use curseforge_import::index_curseforge; +use local_import::index_local; use meilisearch_sdk::client::Client; +use meilisearch_sdk::indexes::Index; use meilisearch_sdk::settings::Settings; use sqlx::postgres::PgPool; use std::collections::{HashMap, VecDeque}; @@ -14,7 +16,7 @@ use thiserror::Error; #[derive(Error, Debug)] pub enum IndexingError { #[error("Error while connecting to the MeiliSearch database")] - IndexDBError(meilisearch_sdk::errors::Error), + IndexDBError(#[from] meilisearch_sdk::errors::Error), #[error("Error while importing mods from CurseForge")] CurseforgeImportError(reqwest::Error), #[error("Error while serializing or deserializing JSON: {0}")] @@ -32,95 +34,115 @@ pub enum IndexingError { // assumes a max average size of 1KiB per mod to avoid this cap. const MEILISEARCH_CHUNK_SIZE: usize = 10000; -pub async fn index_mods(pool: PgPool) -> Result<(), IndexingError> { - // Check if the index exists +#[derive(Debug)] +pub struct IndexingSettings { + pub index_external: bool, + pub index_local: bool, +} + +impl IndexingSettings { + pub fn from_env() -> Self { + let index_local = true; + let index_external = dotenv::var("INDEX_CURSEFORGE") + .ok() + .and_then(|b| b.parse::().ok()) + .unwrap_or(false); + + Self { + index_external, + index_local, + } + } +} + +pub async fn index_mods(pool: PgPool, settings: IndexingSettings) -> Result<(), IndexingError> { + let mut docs_to_add: Vec = vec![]; + + if settings.index_local { + docs_to_add.append(&mut index_local(pool.clone()).await?); + } + if settings.index_external { + docs_to_add.append(&mut index_curseforge(1, 400_000).await?); + } + + // Write Indices + + add_mods(docs_to_add).await?; + + Ok(()) +} + +async fn create_index<'a>( + client: &'a Client<'a>, + name: &'a str, + rules: impl FnOnce() -> Vec, +) -> Result, IndexingError> { + match client.get_index(name).await { + // TODO: update index settings on startup (or delete old indices on startup) + Ok(index) => Ok(index), + Err(meilisearch_sdk::errors::Error::IndexNotFound) => { + // Only create index and set settings if the index doesn't already exist + let index = client.create_index(name, Some("mod_id")).await?; + + index + .set_settings(&default_settings().with_ranking_rules(rules())) + .await?; + + Ok(index) + } + Err(e) => { + log::warn!("Unhandled error while creating index: {}", e); + Err(IndexingError::IndexDBError(e)) + } + } +} + +async fn add_to_index(index: Index<'_>, mods: &[UploadSearchMod]) -> Result<(), IndexingError> { + for chunk in mods.chunks(MEILISEARCH_CHUNK_SIZE) { + index.add_documents(chunk, Some("mod_id")).await?; + } + Ok(()) +} + +pub async fn add_mods(mods: Vec) -> Result<(), IndexingError> { let address = &*dotenv::var("MEILISEARCH_ADDR")?; let client = Client::new(address, ""); - let mut docs_to_add: Vec = vec![]; + // Relevance Index + let relevance_index = create_index(&client, "relevance_mods", || { + let mut relevance_rules = default_rules(); + relevance_rules.push_back("desc(downloads)".to_string()); + relevance_rules.into() + }) + .await?; + add_to_index(relevance_index, &mods).await?; - docs_to_add.append(&mut index_local(pool.clone()).await?); - if dotenv::var("INDEX_CURSEFORGE")? - .parse() - .expect("`INDEX_CURSEFORGE` is not a boolean.") - { - docs_to_add.append(&mut index_curseforge(1, 400_000).await?); - } - //Write Indexes - //Relevance Index + // Downloads Index + let downloads_index = create_index(&client, "downloads_mods", || { + let mut downloads_rules = default_rules(); + downloads_rules.push_front("desc(downloads)".to_string()); + downloads_rules.into() + }) + .await?; + add_to_index(downloads_index, &mods).await?; - let mut relevance_index = client - .get_or_create("relevance_mods") - .map_err(IndexingError::IndexDBError)?; + // Updated Index + let updated_index = create_index(&client, "updated_mods", || { + let mut updated_rules = default_rules(); + updated_rules.push_front("desc(updated)".to_string()); + updated_rules.into() + }) + .await?; + add_to_index(updated_index, &mods).await?; - let mut relevance_rules = default_rules(); - relevance_rules.push_back("desc(downloads)".to_string()); - - relevance_index - .set_settings(&default_settings().with_ranking_rules(relevance_rules.into())) - .map_err(IndexingError::IndexDBError)?; - - for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) { - // TODO: get meilisearch sdk to not require cloning (ie take a reference to docs_to_add) - // This may require making our own fork of it. - relevance_index - .add_documents(Vec::from(chunk), Some("mod_id")) - .map_err(IndexingError::IndexDBError)?; - } - - //Downloads Index - let mut downloads_index = client - .get_or_create("downloads_mods") - .map_err(IndexingError::IndexDBError)?; - - let mut downloads_rules = default_rules(); - downloads_rules.push_front("desc(downloads)".to_string()); - - downloads_index - .set_settings(&default_settings().with_ranking_rules(downloads_rules.into())) - .map_err(IndexingError::IndexDBError)?; - - for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) { - downloads_index - .add_documents(Vec::from(chunk), Some("mod_id")) - .map_err(IndexingError::IndexDBError)?; - } - - //Updated Index - let mut updated_index = client - .get_or_create("updated_mods") - .map_err(IndexingError::IndexDBError)?; - - let mut updated_rules = default_rules(); - updated_rules.push_front("desc(updated)".to_string()); - - updated_index - .set_settings(&default_settings().with_ranking_rules(updated_rules.into())) - .map_err(IndexingError::IndexDBError)?; - - for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) { - updated_index - .add_documents(Vec::from(chunk), Some("mod_id")) - .map_err(IndexingError::IndexDBError)?; - } - - //Created Index - let mut newest_index = client - .get_or_create("newest_mods") - .map_err(IndexingError::IndexDBError)?; - - let mut newest_rules = default_rules(); - newest_rules.push_back("desc(created)".to_string()); - - newest_index - .set_settings(&default_settings().with_ranking_rules(newest_rules.into())) - .map_err(IndexingError::IndexDBError)?; - - for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) { - newest_index - .add_documents(Vec::from(chunk), Some("mod_id")) - .map_err(IndexingError::IndexDBError)?; - } + // Created Index + let newest_index = create_index(&client, "newest_mods", || { + let mut newest_rules = default_rules(); + newest_rules.push_front("desc(created)".to_string()); + newest_rules.into() + }) + .await?; + add_to_index(newest_index, &mods).await?; Ok(()) } @@ -144,7 +166,7 @@ fn default_settings() -> Settings { "author".to_string(), "title".to_string(), "description".to_string(), - "keywords".to_string(), + "categories".to_string(), "versions".to_string(), "downloads".to_string(), "page_url".to_string(), @@ -155,13 +177,12 @@ fn default_settings() -> Settings { "date_modified".to_string(), "updated".to_string(), "latest_version".to_string(), - "empty".to_string(), ]; let searchable_attributes = vec![ "title".to_string(), "description".to_string(), - "keywords".to_string(), + "categories".to_string(), "versions".to_string(), "author".to_string(), "empty".to_string(), @@ -173,6 +194,7 @@ fn default_settings() -> Settings { .with_accept_new_fields(true) .with_stop_words(vec![]) .with_synonyms(HashMap::new()) + .with_attributes_for_faceting(vec![String::from("categories")]) } //endregion diff --git a/src/search/indexing/queue.rs b/src/search/indexing/queue.rs new file mode 100644 index 000000000..f72d90deb --- /dev/null +++ b/src/search/indexing/queue.rs @@ -0,0 +1,31 @@ +use super::{add_mods, IndexingError, UploadSearchMod}; +use std::sync::Mutex; + +pub struct CreationQueue { + // There's probably a better structure for this, but a mutex works + // and I don't think this can deadlock. This queue requires fast + // writes and then a single potentially slower read/write that + // empties the queue. + queue: Mutex>, +} + +impl CreationQueue { + pub fn new() -> Self { + CreationQueue { + queue: Mutex::new(Vec::with_capacity(10)), + } + } + + pub fn add(&self, search_mod: UploadSearchMod) { + // Can only panic if mutex is poisoned + self.queue.lock().unwrap().push(search_mod); + } + pub fn take(&self) -> Vec { + std::mem::replace(&mut *self.queue.lock().unwrap(), Vec::with_capacity(10)) + } +} + +pub async fn index_queue(queue: &CreationQueue) -> Result<(), IndexingError> { + let queue = queue.take(); + add_mods(queue).await +} diff --git a/src/search/mod.rs b/src/search/mod.rs index c5b37d393..e2be3cfdb 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -6,6 +6,7 @@ use meilisearch_sdk::client::Client; use meilisearch_sdk::document::Document; use meilisearch_sdk::search::Query; use serde::{Deserialize, Serialize}; +use std::borrow::Cow; use thiserror::Error; pub mod indexing; @@ -13,7 +14,7 @@ pub mod indexing; #[derive(Error, Debug)] pub enum SearchError { #[error("Error while connecting to the MeiliSearch database")] - IndexDBError(meilisearch_sdk::errors::Error), + IndexDBError(#[from] meilisearch_sdk::errors::Error), #[error("Error while serializing or deserializing JSON: {0}")] SerDeError(#[from] serde_json::Error), #[error("Error while parsing an integer: {0}")] @@ -45,36 +46,75 @@ impl actix_web::ResponseError for SearchError { } } +/// A mod document used for uploading mods to meilisearch's indices. +/// This contains some extra data that is not returned by search results. #[derive(Serialize, Deserialize, Debug, Clone)] -pub struct SearchMod { - pub mod_id: i64, +pub struct UploadSearchMod { + pub mod_id: String, pub author: String, pub title: String, pub description: String, - pub keywords: Vec, + pub categories: Vec, pub versions: Vec, pub downloads: i32, pub page_url: String, pub icon_url: String, pub author_url: String, - pub date_created: String, - pub created: i64, - pub date_modified: String, - pub updated: i64, pub latest_version: String, - pub empty: String, + + /// RFC 3339 formatted creation date of the mod + pub date_created: String, + /// Unix timestamp of the creation date of the mod + pub created_timestamp: i64, + /// RFC 3339 formatted date/time of last major modification (update) + pub date_modified: String, + /// Unix timestamp of the last major modification + pub modified_timestamp: i64, + + /// Must be "{}{}{}", a hack until meilisearch supports searches + /// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729) + // This is a Cow to prevent unnecessary allocations for a static + // string + pub empty: Cow<'static, str>, } -impl Document for SearchMod { - type UIDType = i64; +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ResultSearchMod { + pub mod_id: String, + pub author: String, + pub title: String, + pub description: String, + pub categories: Vec, + // TODO: more efficient format for listing versions, without many repetitions + pub versions: Vec, + pub downloads: i32, + pub page_url: String, + pub icon_url: String, + pub author_url: String, + /// RFC 3339 formatted creation date of the mod + pub date_created: String, + /// RFC 3339 formatted modification date of the mod + pub date_modified: String, + pub latest_version: String, +} + +impl Document for UploadSearchMod { + type UIDType = String; fn get_uid(&self) -> &Self::UIDType { &self.mod_id } } -pub fn search_for_mod(info: &SearchRequest) -> Result, SearchError> { - use std::borrow::Cow; +impl Document for ResultSearchMod { + type UIDType = String; + + fn get_uid(&self) -> &Self::UIDType { + &self.mod_id + } +} + +pub async fn search_for_mod(info: &SearchRequest) -> Result, SearchError> { let address = &*dotenv::var("MEILISEARCH_ADDR")?; let client = Client::new(address, ""); @@ -98,11 +138,15 @@ pub fn search_for_mod(info: &SearchRequest) -> Result, SearchErro if !filters.is_empty() { query = query.with_filters(&filters); } + if let Some(facets) = &info.facets { + let facets = serde_json::from_str::>>(facets)?; + query = query.with_facet_filters(facets); + } Ok(client .get_index(format!("{}_mods", index).as_ref()) - .map_err(SearchError::IndexDBError)? - .search::(&query) - .map_err(SearchError::IndexDBError)? + .await? + .search::(&query) + .await? .hits) }