diff --git a/Cargo.lock b/Cargo.lock index 33a0db8b5..3dcb55819 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1726,15 +1726,14 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meilisearch-sdk" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a9e61da1ebd3d15e0aaa978d3f1f080e3793494ddea0bc6703da4b330ea1ffc" +checksum = "cb2081610089deb10290747b8782049f9cb64a70a4d305a28970db8b780d1448" dependencies = [ "log", "reqwest", "serde", "serde_json", - "urlencoding", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", @@ -3009,12 +3008,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "urlencoding" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9232eb53352b4442e40d7900465dfc534e8cb2dc8f18656fcb2ac16112b5593" - [[package]] name = "v_escape" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index bb86d1b7d..60368f95e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ actix-files = "0.4.0" actix-multipart = "0.3.0" actix-cors = "0.4.1" -meilisearch-sdk = "0.3.0" +meilisearch-sdk = "0.4.0" reqwest = { version = "0.10.8", features = ["json"] } serde_json = "1.0" diff --git a/src/main.rs b/src/main.rs index 69b42be3e..499e652f6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,10 @@ use crate::file_hosting::S3Host; use actix_cors::Cors; use actix_ratelimit::{MemoryStore, MemoryStoreActor, RateLimiter}; -use actix_web::middleware::Logger; use actix_web::{http, web, App, HttpServer}; use env_logger::Env; use gumdrop::Options; -use log::{info, warn}; +use log::{error, info, warn}; use search::indexing::index_mods; use search::indexing::IndexingSettings; use std::sync::Arc; @@ -29,6 +28,12 @@ struct Config { reconfigure_indices: bool, #[options(no_short, help = "Reset the documents in the indices")] reset_indices: bool, + + #[options( + no_short, + help = "Allow missing environment variables on startup. This is a bad idea, but it may work in some cases." + )] + allow_missing_vars: bool, } #[actix_rt::main] @@ -38,7 +43,15 @@ async fn main() -> std::io::Result<()> { let config = Config::parse_args_default_or_exit(); - check_env_vars(); + if check_env_vars() { + error!("Some environment variables are missing!"); + if !config.allow_missing_vars { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Missing required environment variables", + )); + } + } let search_config = search::SearchConfig { address: dotenv::var("MEILISEARCH_ADDR").unwrap(), @@ -225,8 +238,6 @@ async fn main() -> std::io::Result<()> { App::new() .wrap(cors.finish()) - .wrap(Logger::default()) - .wrap(Logger::new("%a %{User-Agent}i")) .wrap( RateLimiter::new(MemoryStoreActor::from(store.clone()).start()) .with_interval(std::time::Duration::from_secs(60)) @@ -254,8 +265,10 @@ async fn main() -> std::io::Result<()> { } // This is so that env vars not used immediately don't panic at runtime -fn check_env_vars() { - fn check_var(var: &str) { +fn check_env_vars() -> bool { + let mut failed = false; + + fn check_var(var: &str) -> bool { if dotenv::var(var) .ok() .and_then(|s| s.parse::().ok()) @@ -265,7 +278,10 @@ fn check_env_vars() { "Variable `{}` missing in dotenv or not of type `{}`", var, std::any::type_name::() - ) + ); + true + } else { + false } } @@ -275,51 +291,55 @@ fn check_env_vars() { .is_none() { warn!("Variable `CORS_ORIGINS` missing in dotenv or not a json array of strings"); + failed |= true; } - check_var::("CDN_URL"); - check_var::("DATABASE_URL"); - check_var::("MEILISEARCH_ADDR"); - check_var::("MEILISEARCH_KEY"); - check_var::("BIND_ADDR"); + failed |= check_var::("CDN_URL"); + failed |= check_var::("DATABASE_URL"); + failed |= check_var::("MEILISEARCH_ADDR"); + failed |= check_var::("MEILISEARCH_KEY"); + failed |= check_var::("BIND_ADDR"); - check_var::("STORAGE_BACKEND"); + failed |= check_var::("STORAGE_BACKEND"); let storage_backend = dotenv::var("STORAGE_BACKEND").ok(); if storage_backend.as_deref() == Some("backblaze") { - check_var::("BACKBLAZE_KEY_ID"); - check_var::("BACKBLAZE_KEY"); - check_var::("BACKBLAZE_BUCKET_ID"); + failed |= check_var::("BACKBLAZE_KEY_ID"); + failed |= check_var::("BACKBLAZE_KEY"); + failed |= check_var::("BACKBLAZE_BUCKET_ID"); } else if storage_backend.as_deref() == Some("s3") { - check_var::("S3_ACCESS_TOKEN"); - check_var::("S3_SECRET"); - check_var::("S3_URL"); - check_var::("S3_REGION"); - check_var::("S3_BUCKET_NAME"); + failed |= check_var::("S3_ACCESS_TOKEN"); + failed |= check_var::("S3_SECRET"); + failed |= check_var::("S3_URL"); + failed |= check_var::("S3_REGION"); + failed |= check_var::("S3_BUCKET_NAME"); } else if storage_backend.as_deref() == Some("local") { - check_var::("MOCK_FILE_PATH"); + failed |= check_var::("MOCK_FILE_PATH"); } else if let Some(backend) = storage_backend { warn!("Variable `STORAGE_BACKEND` contains an invalid value: {}. Expected \"backblaze\", \"s3\", or \"local\".", backend); + failed |= true; } - check_var::("INDEX_CURSEFORGE"); + failed |= check_var::("INDEX_CURSEFORGE"); if dotenv::var("INDEX_CURSEFORGE") .ok() .and_then(|s| s.parse::().ok()) .unwrap_or(false) { - check_var::("EXTERNAL_INDEX_INTERVAL"); - check_var::("MAX_CURSEFORGE_ID"); + failed |= check_var::("EXTERNAL_INDEX_INTERVAL"); + failed |= check_var::("MAX_CURSEFORGE_ID"); } - check_var::("LOCAL_INDEX_INTERVAL"); + failed |= check_var::("LOCAL_INDEX_INTERVAL"); // In theory this should be an OsString since it's a path, but // dotenv doesn't support that. The usage of this does treat // it as an OsString, though. - check_var::("INDEX_CACHE_PATH"); + failed |= check_var::("INDEX_CACHE_PATH"); - check_var::("GITHUB_CLIENT_ID"); - check_var::("GITHUB_CLIENT_SECRET"); + failed |= check_var::("GITHUB_CLIENT_ID"); + failed |= check_var::("GITHUB_CLIENT_SECRET"); + + failed } diff --git a/src/search/indexing/curseforge_import.rs b/src/search/indexing/curseforge_import.rs index be65f653c..1cd4bc1d9 100644 --- a/src/search/indexing/curseforge_import.rs +++ b/src/search/indexing/curseforge_import.rs @@ -119,21 +119,23 @@ lazy_static::lazy_static! { pub async fn index_curseforge( start_index: u32, end_index: u32, - cache_path: &std::path::Path, + cache_path: Option<&std::path::Path>, ) -> Result, IndexingError> { info!("Indexing curseforge mods!"); let start = std::time::Instant::now(); let mut docs_to_add: Vec = vec![]; - let cache = std::fs::File::open(cache_path) + let cache = cache_path + .map(std::fs::File::open) + .and_then(Result::ok) .map(std::io::BufReader::new) .map(serde_json::from_reader::<_, Vec>); let requested_ids; // This caching system can't handle segmented indexing - if let Ok(Ok(mut cache)) = cache { + if let Some(Ok(mut cache)) = cache { let end = cache.last().copied().unwrap_or(start_index); cache.extend(end..end_index); requested_ids = serde_json::to_string(&cache)?; @@ -167,11 +169,13 @@ pub async fn index_curseforge( // Only write to the cache if this doesn't skip mods at the start // The caching system iterates through all ids normally past the last // id in the cache, so the end_index shouldn't matter. - if start_index <= 1 { - let mut ids = curseforge_mods.iter().map(|m| m.id).collect::>(); - ids.sort_unstable(); - if let Err(e) = std::fs::write(cache_path, serde_json::to_string(&ids)?) { - log::warn!("Error writing to index id cache: {}", e); + if let Some(path) = cache_path { + if start_index <= 1 { + let mut ids = curseforge_mods.iter().map(|m| m.id).collect::>(); + ids.sort_unstable(); + if let Err(e) = std::fs::write(path, serde_json::to_string(&ids)?) { + log::warn!("Error writing to index id cache: {}", e); + } } } @@ -192,8 +196,8 @@ pub async fn index_curseforge( for file in curseforge_mod.latest_files { for version in file.game_version { match &*version { - "Fabric" => loaders.forge = true, - "Forge" => loaders.fabric = true, + "Fabric" => loaders.fabric = true, + "Forge" => loaders.forge = true, "Rift" => loaders.rift = true, _ => (), } @@ -309,7 +313,6 @@ pub async fn index_curseforge( modified_timestamp: curseforge_mod.date_modified.timestamp(), latest_version, host: Cow::Borrowed("curseforge"), - empty: Cow::Borrowed("{}{}{}"), }) } diff --git a/src/search/indexing/local_import.rs b/src/search/indexing/local_import.rs index 801eb7e6b..c1d816a4f 100644 --- a/src/search/indexing/local_import.rs +++ b/src/search/indexing/local_import.rs @@ -112,7 +112,6 @@ pub async fn index_local(pool: PgPool) -> Result, IndexingE modified_timestamp: mod_data.updated.timestamp(), latest_version, host: Cow::Borrowed("modrinth"), - empty: Cow::Borrowed("{}{}{}"), }); } } @@ -225,6 +224,5 @@ pub async fn query_one( modified_timestamp: mod_data.updated.timestamp(), latest_version, host: Cow::Borrowed("modrinth"), - empty: Cow::Borrowed("{}{}{}"), }) } diff --git a/src/search/indexing/mod.rs b/src/search/indexing/mod.rs index 4c23a1f75..ed2689136 100644 --- a/src/search/indexing/mod.rs +++ b/src/search/indexing/mod.rs @@ -63,7 +63,7 @@ pub async fn index_mods( ) -> Result<(), IndexingError> { let mut docs_to_add: Vec = vec![]; - let cache_path = std::path::PathBuf::from(std::env::var_os("INDEX_CACHE_PATH").unwrap()); + let cache_path = std::env::var_os("INDEX_CACHE_PATH").map(std::path::PathBuf::from); if settings.index_local { docs_to_add.append(&mut index_local(pool.clone()).await?); @@ -74,7 +74,7 @@ pub async fn index_mods( .map(|i| i.parse().unwrap()) .unwrap_or(450_000); - docs_to_add.append(&mut index_curseforge(1, end_index, &cache_path).await?); + docs_to_add.append(&mut index_curseforge(1, end_index, cache_path.as_deref()).await?); } // Write Indices @@ -270,7 +270,6 @@ fn default_settings() -> Settings { "categories".to_string(), "versions".to_string(), "author".to_string(), - "empty".to_string(), ]; Settings::new() diff --git a/src/search/mod.rs b/src/search/mod.rs index 74cefa776..c31e6402e 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -5,7 +5,6 @@ use actix_web::web::HttpResponse; use chrono::{DateTime, Utc}; use meilisearch_sdk::client::Client; use meilisearch_sdk::document::Document; -use meilisearch_sdk::search::Query; use serde::{Deserialize, Serialize}; use std::borrow::Cow; use std::cmp::min; @@ -84,12 +83,6 @@ pub struct UploadSearchMod { pub modified_timestamp: i64, pub host: Cow<'static, str>, - - /// Must be "{}{}{}", a hack until meilisearch supports searches - /// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729) - // This is a Cow to prevent unnecessary allocations for a static - // string - pub empty: Cow<'static, str>, } #[derive(Serialize, Deserialize, Debug)] @@ -155,23 +148,6 @@ pub async fn search_for_mod( let offset = info.offset.as_deref().unwrap_or("0").parse()?; let index = info.index.as_deref().unwrap_or("relevance"); let limit = info.limit.as_deref().unwrap_or("10").parse()?; - let search_query: &str = info - .query - .as_deref() - .filter(|s| !s.is_empty()) - .unwrap_or("{}{}{}"); - - let mut query = Query::new(search_query) - .with_limit(min(100, limit)) - .with_offset(offset); - - if !filters.is_empty() { - query = query.with_filters(&filters); - } - if let Some(facets) = &info.facets { - let facets = serde_json::from_str::>>(facets)?; - query = query.with_facet_filters(facets); - } let index = match index { "relevance" => "relevance_mods", @@ -181,14 +157,44 @@ pub async fn search_for_mod( i => return Err(SearchError::InvalidIndex(i.to_string())), }; - let results = client - .get_index(index) - .await? - .search::(&query) - .await?; + let meilisearch_index = client.get_index(index).await?; + let mut query = meilisearch_index.search(); + + query.with_limit(min(100, limit)).with_offset(offset); + + if let Some(search) = info.query.as_deref() { + if !search.is_empty() { + query.with_query(search); + } + } + + if !filters.is_empty() { + query.with_filters(&filters); + } + + // So the meilisearch sdk's lifetimes are... broken, to say the least + // They are overspecified and almost always wrong, and would generally + // just be better if they didn't specify them at all. + + // They also decided to have this take a &[&[&str]], which is impossible + // to construct efficiently. Instead it should take impl Iterator, + // &[impl AsRef<[&str]>], or one of many other proper solutions to that issue. + + let why_meilisearch; + let why_must_you_do_this; + if let Some(facets) = &info.facets { + why_meilisearch = serde_json::from_str::>>(facets)?; + why_must_you_do_this = why_meilisearch + .iter() + .map(|v| v as &[_]) + .collect::>(); + query.with_facet_filters(&why_must_you_do_this); + } + + let results = query.execute::().await?; Ok(SearchResults { - hits: results.hits, + hits: results.hits.into_iter().map(|r| r.result).collect(), offset: results.offset, limit: results.limit, total_hits: results.nb_hits,