From b2be4a7d6722b6ab93b1a43af4d97049c467d42d Mon Sep 17 00:00:00 2001 From: Wyatt Verchere Date: Sun, 3 Dec 2023 06:27:12 -0800 Subject: [PATCH] Search overhaul (#771) * started work; switching context * working! * fmt clippy prepare * fixes * fixes * revs * merge fixes * changed comments * merge issues --- ...e614066bd7f7b3f653f186f1262d448ef89a1.json | 35 ++ ...a9b1cfb2e9a1c094bc5e0d529a314a77fb4d7.json | 173 --------- src/lib.rs | 4 +- src/models/v2/search.rs | 74 ++-- src/models/v3/projects.rs | 205 +++++++++-- src/routes/internal/admin.rs | 4 +- src/routes/v2/admin.rs | 4 +- src/routes/v2_reroute.rs | 21 +- src/routes/v3/projects.rs | 20 ++ src/search/indexing/local_import.rs | 330 ++++++++++-------- src/search/indexing/mod.rs | 30 +- src/search/mod.rs | 47 ++- tests/common/api_v3/project.rs | 7 +- tests/common/mod.rs | 1 + tests/common/search.rs | 212 +++++++++++ tests/project.rs | 46 ++- tests/search.rs | 209 +---------- tests/v2/search.rs | 45 +++ 18 files changed, 882 insertions(+), 585 deletions(-) create mode 100644 .sqlx/query-794b781594db938d7e0e53f957ee614066bd7f7b3f653f186f1262d448ef89a1.json delete mode 100644 .sqlx/query-94de8109ff9f95be5e9f70c629fa9b1cfb2e9a1c094bc5e0d529a314a77fb4d7.json create mode 100644 tests/common/search.rs diff --git a/.sqlx/query-794b781594db938d7e0e53f957ee614066bd7f7b3f653f186f1262d448ef89a1.json b/.sqlx/query-794b781594db938d7e0e53f957ee614066bd7f7b3f653f186f1262d448ef89a1.json new file mode 100644 index 000000000..6e8e6d3a2 --- /dev/null +++ b/.sqlx/query-794b781594db938d7e0e53f957ee614066bd7f7b3f653f186f1262d448ef89a1.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT v.id id, m.id mod_id, u.username owner_username\n \n FROM versions v\n INNER JOIN mods m ON v.mod_id = m.id AND m.status = ANY($2)\n INNER JOIN team_members tm ON tm.team_id = m.team_id AND tm.is_owner = TRUE AND tm.accepted = TRUE\n INNER JOIN users u ON tm.user_id = u.id\n WHERE v.status != ANY($1)\n GROUP BY v.id, m.id, u.id\n ORDER BY m.id DESC;\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "mod_id", + "type_info": "Int8" + }, + { + "ordinal": 2, + "name": "owner_username", + "type_info": "Varchar" + } + ], + "parameters": { + "Left": [ + "TextArray", + "TextArray" + ] + }, + "nullable": [ + false, + false, + false + ] + }, + "hash": "794b781594db938d7e0e53f957ee614066bd7f7b3f653f186f1262d448ef89a1" +} diff --git a/.sqlx/query-94de8109ff9f95be5e9f70c629fa9b1cfb2e9a1c094bc5e0d529a314a77fb4d7.json b/.sqlx/query-94de8109ff9f95be5e9f70c629fa9b1cfb2e9a1c094bc5e0d529a314a77fb4d7.json deleted file mode 100644 index e3f37c5f0..000000000 --- a/.sqlx/query-94de8109ff9f95be5e9f70c629fa9b1cfb2e9a1c094bc5e0d529a314a77fb4d7.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n WITH version_fields_cte AS (\n SELECT version_id, field_id, int_value, enum_value, string_value\n FROM version_fields\n ),\n version_fields_json AS (\n SELECT DISTINCT version_id,\n JSONB_AGG( \n DISTINCT jsonb_build_object('field_id', field_id, 'int_value', int_value, 'enum_value', enum_value, 'string_value', string_value)\n ) version_fields_json\n FROM version_fields_cte\n GROUP BY version_id\n ),\n loader_fields_cte AS (\n SELECT DISTINCT vf.version_id, lf.*, l.loader\n FROM loader_fields lf\n INNER JOIN version_fields_cte vf ON lf.id = vf.field_id\n LEFT JOIN loaders_versions lv ON vf.version_id = lv.version_id\n LEFT JOIN loaders l ON lv.loader_id = l.id\n GROUP BY vf.version_id, lf.enum_type, lf.id, l.loader\n ),\n loader_fields_json AS (\n SELECT DISTINCT version_id,\n JSONB_AGG(\n DISTINCT jsonb_build_object(\n 'version_id', lf.version_id,\n 'lf_id', id, 'loader_name', loader, 'field', field, 'field_type', field_type, 'enum_type', enum_type, 'min_val', min_val, 'max_val', max_val, 'optional', optional\n )\n ) filter (where lf.id is not null) loader_fields_json\n FROM loader_fields_cte lf\n GROUP BY version_id\n ),\n loader_field_enum_values_json AS (\n SELECT DISTINCT version_id,\n JSONB_AGG(\n DISTINCT jsonb_build_object(\n 'id', lfev.id, 'enum_id', lfev.enum_id, 'value', lfev.value, 'ordering', lfev.ordering, 'created', lfev.created, 'metadata', lfev.metadata\n ) \n ) filter (where lfev.id is not null) loader_field_enum_values_json\n FROM loader_field_enum_values lfev\n INNER JOIN loader_fields_cte lf on lf.enum_type = lfev.enum_id\n GROUP BY version_id\n )\n\n SELECT m.id id, v.id version_id, m.name name, m.description description, m.downloads downloads, m.follows follows,\n m.icon_url icon_url, m.published published, m.approved approved, m.updated updated,\n m.team_id team_id, m.license license, m.slug slug, m.status status_name, m.color color,\n u.username username,\n ARRAY_AGG(DISTINCT c.category) filter (where c.category is not null and mc.is_additional is false) categories,\n ARRAY_AGG(DISTINCT c.category) filter (where c.category is not null and mc.is_additional is true) additional_categories,\n ARRAY_AGG(DISTINCT lo.loader) filter (where lo.loader is not null) loaders,\n ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types,\n ARRAY_AGG(DISTINCT g.slug) filter (where g.slug is not null) games,\n ARRAY_AGG(DISTINCT mg.image_url) filter (where mg.image_url is not null and mg.featured is false) gallery,\n ARRAY_AGG(DISTINCT mg.image_url) filter (where mg.image_url is not null and mg.featured is true) featured_gallery,\n vf.version_fields_json version_fields,\n lf.loader_fields_json loader_fields,\n lfev.loader_field_enum_values_json loader_field_enum_values\n FROM versions v\n INNER JOIN mods m ON v.mod_id = m.id AND m.status = ANY($2)\n LEFT OUTER JOIN mods_categories mc ON joining_mod_id = m.id\n LEFT OUTER JOIN categories c ON mc.joining_category_id = c.id\n LEFT OUTER JOIN loaders_versions lv ON lv.version_id = v.id\n LEFT OUTER JOIN loaders lo ON lo.id = lv.loader_id\n LEFT JOIN loaders_project_types lpt ON lpt.joining_loader_id = lo.id\n LEFT JOIN project_types pt ON pt.id = lpt.joining_project_type_id\n LEFT JOIN loaders_project_types_games lptg ON lptg.loader_id = lo.id AND lptg.project_type_id = pt.id\n LEFT JOIN games g ON lptg.game_id = g.id\n LEFT OUTER JOIN mods_gallery mg ON mg.mod_id = m.id\n INNER JOIN team_members tm ON tm.team_id = m.team_id AND tm.is_owner = TRUE AND tm.accepted = TRUE\n INNER JOIN users u ON tm.user_id = u.id\n LEFT OUTER JOIN version_fields_json vf ON v.id = vf.version_id\n LEFT OUTER JOIN loader_fields_json lf ON v.id = lf.version_id\n LEFT OUTER JOIN loader_field_enum_values_json lfev ON v.id = lfev.version_id\n WHERE v.status != ANY($1)\n GROUP BY v.id, vf.version_fields_json, lf.loader_fields_json, lfev.loader_field_enum_values_json, m.id, u.id;\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "id", - "type_info": "Int8" - }, - { - "ordinal": 1, - "name": "version_id", - "type_info": "Int8" - }, - { - "ordinal": 2, - "name": "name", - "type_info": "Varchar" - }, - { - "ordinal": 3, - "name": "description", - "type_info": "Varchar" - }, - { - "ordinal": 4, - "name": "downloads", - "type_info": "Int4" - }, - { - "ordinal": 5, - "name": "follows", - "type_info": "Int4" - }, - { - "ordinal": 6, - "name": "icon_url", - "type_info": "Varchar" - }, - { - "ordinal": 7, - "name": "published", - "type_info": "Timestamptz" - }, - { - "ordinal": 8, - "name": "approved", - "type_info": "Timestamptz" - }, - { - "ordinal": 9, - "name": "updated", - "type_info": "Timestamptz" - }, - { - "ordinal": 10, - "name": "team_id", - "type_info": "Int8" - }, - { - "ordinal": 11, - "name": "license", - "type_info": "Varchar" - }, - { - "ordinal": 12, - "name": "slug", - "type_info": "Varchar" - }, - { - "ordinal": 13, - "name": "status_name", - "type_info": "Varchar" - }, - { - "ordinal": 14, - "name": "color", - "type_info": "Int4" - }, - { - "ordinal": 15, - "name": "username", - "type_info": "Varchar" - }, - { - "ordinal": 16, - "name": "categories", - "type_info": "VarcharArray" - }, - { - "ordinal": 17, - "name": "additional_categories", - "type_info": "VarcharArray" - }, - { - "ordinal": 18, - "name": "loaders", - "type_info": "VarcharArray" - }, - { - "ordinal": 19, - "name": "project_types", - "type_info": "VarcharArray" - }, - { - "ordinal": 20, - "name": "games", - "type_info": "VarcharArray" - }, - { - "ordinal": 21, - "name": "gallery", - "type_info": "VarcharArray" - }, - { - "ordinal": 22, - "name": "featured_gallery", - "type_info": "VarcharArray" - }, - { - "ordinal": 23, - "name": "version_fields", - "type_info": "Jsonb" - }, - { - "ordinal": 24, - "name": "loader_fields", - "type_info": "Jsonb" - }, - { - "ordinal": 25, - "name": "loader_field_enum_values", - "type_info": "Jsonb" - } - ], - "parameters": { - "Left": [ - "TextArray", - "TextArray" - ] - }, - "nullable": [ - false, - false, - false, - false, - false, - false, - true, - false, - true, - false, - false, - false, - true, - false, - true, - false, - null, - null, - null, - null, - null, - null, - null, - null, - null, - null - ] - }, - "hash": "94de8109ff9f95be5e9f70c629fa9b1cfb2e9a1c094bc5e0d529a314a77fb4d7" -} diff --git a/src/lib.rs b/src/lib.rs index 0f41dca23..a01112605 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,12 +80,14 @@ pub fn app_setup( let pool_ref = pool.clone(); let search_config_ref = search_config.clone(); + let redis_pool_ref = redis_pool.clone(); scheduler.run(local_index_interval, move || { let pool_ref = pool_ref.clone(); + let redis_pool_ref = redis_pool_ref.clone(); let search_config_ref = search_config_ref.clone(); async move { info!("Indexing local database"); - let result = index_projects(pool_ref, &search_config_ref).await; + let result = index_projects(pool_ref, redis_pool_ref.clone(), &search_config_ref).await; if let Err(e) = result { warn!("Local project indexing failed: {:?}", e); } diff --git a/src/models/v2/search.rs b/src/models/v2/search.rs index 5f11ec438..746710fce 100644 --- a/src/models/v2/search.rs +++ b/src/models/v2/search.rs @@ -1,6 +1,7 @@ +use itertools::Itertools; use serde::{Deserialize, Serialize}; -use crate::search::ResultSearchProject; +use crate::{routes::v2_reroute, search::ResultSearchProject}; #[derive(Serialize, Deserialize, Debug)] pub struct LegacySearchResults { @@ -44,7 +45,12 @@ impl LegacyResultSearchProject { if categories.contains(&"mrpack".to_string()) { if let Some(mrpack_loaders) = result_search_project.loader_fields.get("mrpack_loaders") { - categories.extend(mrpack_loaders.clone()); + categories.extend( + mrpack_loaders + .iter() + .filter_map(|c| c.as_str()) + .map(String::from), + ); categories.retain(|c| c != "mrpack"); } } @@ -52,7 +58,12 @@ impl LegacyResultSearchProject { if display_categories.contains(&"mrpack".to_string()) { if let Some(mrpack_loaders) = result_search_project.loader_fields.get("mrpack_loaders") { - display_categories.extend(mrpack_loaders.clone()); + categories.extend( + mrpack_loaders + .iter() + .filter_map(|c| c.as_str()) + .map(String::from), + ); display_categories.retain(|c| c != "mrpack"); } } @@ -84,25 +95,44 @@ impl LegacyResultSearchProject { project_type }; + let loader_fields = result_search_project.loader_fields.clone(); + let get_one_bool_loader_field = |key: &str| { + loader_fields + .get(key) + .cloned() + .unwrap_or_default() + .first() + .and_then(|s| s.as_bool()) + }; + + let singleplayer = get_one_bool_loader_field("singleplayer"); + let client_only = get_one_bool_loader_field("client_only").unwrap_or(false); + let server_only = get_one_bool_loader_field("server_only").unwrap_or(false); + let client_and_server = get_one_bool_loader_field("client_and_server"); + + let (client_side, server_side) = v2_reroute::convert_side_types_v2_bools( + singleplayer, + client_only, + server_only, + client_and_server, + ); + let client_side = client_side.to_string(); + let server_side = server_side.to_string(); + + let versions = result_search_project + .loader_fields + .get("game_versions") + .cloned() + .unwrap_or_default() + .into_iter() + .filter_map(|s| s.as_str().map(String::from)) + .collect_vec(); + Self { project_type, - client_side: result_search_project - .loader_fields - .get("client_side") - .cloned() - .unwrap_or_default() - .join(","), - server_side: result_search_project - .loader_fields - .get("server_side") - .cloned() - .unwrap_or_default() - .join(","), - versions: result_search_project - .loader_fields - .get("game_versions") - .cloned() - .unwrap_or_default(), + client_side, + server_side, + versions, latest_version: result_search_project.version_id, categories, @@ -110,11 +140,11 @@ impl LegacyResultSearchProject { slug: result_search_project.slug, author: result_search_project.author, title: result_search_project.name, - description: result_search_project.description, + description: result_search_project.summary, display_categories, downloads: result_search_project.downloads, follows: result_search_project.follows, - icon_url: result_search_project.icon_url, + icon_url: result_search_project.icon_url.unwrap_or_default(), license: result_search_project.license, date_created: result_search_project.date_created, date_modified: result_search_project.date_modified, diff --git a/src/models/v3/projects.rs b/src/models/v3/projects.rs index b9bfe50f0..621037084 100644 --- a/src/models/v3/projects.rs +++ b/src/models/v3/projects.rs @@ -1,12 +1,16 @@ use std::collections::{HashMap, HashSet}; +use super::ids::base62_impl::parse_base62; use super::ids::{Base62Id, OrganizationId}; use super::teams::TeamId; use super::users::UserId; +use crate::database::models::loader_fields::VersionField; use crate::database::models::project_item::{LinkUrl, QueryProject}; use crate::database::models::version_item::QueryVersion; use crate::models::threads::ThreadId; +use crate::search::ResultSearchProject; use chrono::{DateTime, Utc}; +use itertools::Itertools; use serde::{Deserialize, Serialize}; use validator::Validate; @@ -119,30 +123,38 @@ fn remove_duplicates(values: Vec) -> Vec { .collect() } +// This is a helper function to convert a list of VersionFields into a HashMap of field name to vecs of values +// This allows for removal of duplicates +pub fn from_duplicate_version_fields( + version_fields: Vec, +) -> HashMap> { + let mut fields: HashMap> = HashMap::new(); + for vf in version_fields { + // We use a string directly, so we can remove duplicates + let serialized = if let Some(inner_array) = vf.value.serialize_internal().as_array() { + inner_array.clone() + } else { + vec![vf.value.serialize_internal()] + }; + + // Create array if doesnt exist, otherwise push, or if json is an array, extend + if let Some(arr) = fields.get_mut(&vf.field_name) { + arr.extend(serialized); + } else { + fields.insert(vf.field_name, serialized); + } + } + + // Remove duplicates by converting to string and back + for (_, v) in fields.iter_mut() { + *v = remove_duplicates(v.clone()); + } + fields +} + impl From for Project { fn from(data: QueryProject) -> Self { - let mut fields: HashMap> = HashMap::new(); - for vf in data.aggregate_version_fields { - // We use a string directly, so we can remove duplicates - let serialized = if let Some(inner_array) = vf.value.serialize_internal().as_array() { - inner_array.clone() - } else { - vec![vf.value.serialize_internal()] - }; - - // Create array if doesnt exist, otherwise push, or if json is an array, extend - if let Some(arr) = fields.get_mut(&vf.field_name) { - arr.extend(serialized); - } else { - fields.insert(vf.field_name, serialized); - } - } - - // Remove duplicates by converting to string and back - for (_, v) in fields.iter_mut() { - *v = remove_duplicates(v.clone()); - } - + let fields = from_duplicate_version_fields(data.aggregate_version_fields); let m = data.inner; Self { id: m.id.into(), @@ -221,6 +233,155 @@ impl From for Project { } } +impl Project { + // Matches the from QueryProject, but with a ResultSearchProject + pub fn from_search(m: ResultSearchProject) -> Option { + let project_id = ProjectId(parse_base62(&m.project_id).ok()?); + let team_id = TeamId(parse_base62(&m.team_id).ok()?); + let organization_id = m + .organization_id + .and_then(|id| Some(OrganizationId(parse_base62(&id).ok()?))); + let thread_id = ThreadId(parse_base62(&m.thread_id).ok()?); + let versions = m + .versions + .iter() + .filter_map(|id| Some(VersionId(parse_base62(id).ok()?))) + .collect(); + + let approved = DateTime::parse_from_rfc3339(&m.date_created).ok()?; + let published = DateTime::parse_from_rfc3339(&m.date_published).ok()?.into(); + let approved = if approved == published { + None + } else { + Some(approved.into()) + }; + + let updated = DateTime::parse_from_rfc3339(&m.date_modified).ok()?.into(); + let queued = m + .date_queued + .and_then(|dq| DateTime::parse_from_rfc3339(&dq).ok()) + .map(|d| d.into()); + + let status = ProjectStatus::from_string(&m.status); + let requested_status = m + .requested_status + .map(|mrs| ProjectStatus::from_string(&mrs)); + + let license_url = m.license_url; + let icon_url = m.icon_url; + + // Loaders + let mut loaders = m.loaders; + let mrpack_loaders_strings = m.loader_fields.get("mrpack_loaders").cloned().map(|v| { + v.into_iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect_vec() + }); + // If the project has a mrpack loader, keep only 'loaders' that are not in the mrpack_loaders + if let Some(ref mrpack_loaders) = mrpack_loaders_strings { + loaders.retain(|l| !mrpack_loaders.contains(l)); + } + + // Categories + let mut categories = m.display_categories.clone(); + categories.retain(|c| !loaders.contains(c)); + if let Some(ref mrpack_loaders) = mrpack_loaders_strings { + categories.retain(|l| !mrpack_loaders.contains(l)); + } + + // Additional categories + let mut additional_categories = m.categories.clone(); + additional_categories.retain(|c| !categories.contains(c)); + additional_categories.retain(|c| !loaders.contains(c)); + if let Some(ref mrpack_loaders) = mrpack_loaders_strings { + additional_categories.retain(|l| !mrpack_loaders.contains(l)); + } + + let games = m.games; + + let monetization_status = m + .monetization_status + .as_deref() + .map(MonetizationStatus::from_string) + .unwrap_or(MonetizationStatus::Monetized); + + let link_urls = m + .links + .into_iter() + .map(|d| (d.platform_name.clone(), Link::from(d))) + .collect(); + + let gallery = m + .gallery_items + .into_iter() + .map(|x| GalleryItem { + url: x.image_url, + featured: x.featured, + name: x.name, + description: x.description, + created: x.created, + ordering: x.ordering, + }) + .collect(); + + Some(Self { + id: project_id, + slug: m.slug, + project_types: m.project_types, + games, + team_id, + organization: organization_id, + name: m.name, + summary: m.summary, + description: "".to_string(), // Body is potentially huge, do not store in search + published, + updated, + approved, + queued, + status, + requested_status, + moderator_message: None, // Deprecated + license: License { + id: m.license.clone(), + name: match spdx::Expression::parse(&m.license) { + Ok(spdx_expr) => { + let mut vec: Vec<&str> = Vec::new(); + for node in spdx_expr.iter() { + if let spdx::expression::ExprNode::Req(req) = node { + if let Some(id) = req.req.license.id() { + vec.push(id.full_name); + } + } + } + // spdx crate returns AND/OR operations in postfix order + // and it would be a lot more effort to make it actually in order + // so let's just ignore that and make them comma-separated + vec.join(", ") + } + Err(_) => "".to_string(), + }, + url: license_url, + }, + downloads: m.downloads as u32, + followers: m.follows as u32, + categories, + additional_categories, + loaders, + versions, + icon_url, + link_urls, + gallery, + color: m.color, + thread_id, + monetization_status, + fields: m + .loader_fields + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(), + }) + } +} #[derive(Serialize, Deserialize, Clone, Debug)] pub struct GalleryItem { pub url: String, diff --git a/src/routes/internal/admin.rs b/src/routes/internal/admin.rs index c3eadbcdf..7e9b9d8d9 100644 --- a/src/routes/internal/admin.rs +++ b/src/routes/internal/admin.rs @@ -139,9 +139,11 @@ pub async fn count_download( #[post("/_force_reindex", guard = "admin_key_guard")] pub async fn force_reindex( pool: web::Data, + redis: web::Data, config: web::Data, ) -> Result { use crate::search::indexing::index_projects; - index_projects(pool.as_ref().clone(), &config).await?; + let redis = redis.get_ref(); + index_projects(pool.as_ref().clone(), redis.clone(), &config).await?; Ok(HttpResponse::NoContent().finish()) } diff --git a/src/routes/v2/admin.rs b/src/routes/v2/admin.rs index c3eadbcdf..7e9b9d8d9 100644 --- a/src/routes/v2/admin.rs +++ b/src/routes/v2/admin.rs @@ -139,9 +139,11 @@ pub async fn count_download( #[post("/_force_reindex", guard = "admin_key_guard")] pub async fn force_reindex( pool: web::Data, + redis: web::Data, config: web::Data, ) -> Result { use crate::search::indexing::index_projects; - index_projects(pool.as_ref().clone(), &config).await?; + let redis = redis.get_ref(); + index_projects(pool.as_ref().clone(), redis.clone(), &config).await?; Ok(HttpResponse::NoContent().finish()) } diff --git a/src/routes/v2_reroute.rs b/src/routes/v2_reroute.rs index e5cfdbcd0..cd83b0c54 100644 --- a/src/routes/v2_reroute.rs +++ b/src/routes/v2_reroute.rs @@ -243,8 +243,6 @@ pub fn convert_side_type_facets_v3(facets: Vec>>) -> Vec, ) -> (LegacySideType, LegacySideType) { - use LegacySideType::{Optional, Required, Unsupported}; - let client_and_server = side_types .get("client_and_server") .and_then(|x| x.as_bool()) @@ -262,6 +260,25 @@ pub fn convert_side_types_v2( .and_then(|x| x.as_bool()) .unwrap_or(false); + convert_side_types_v2_bools( + Some(singleplayer), + client_only, + server_only, + Some(client_and_server), + ) +} + +// Client side, server side +pub fn convert_side_types_v2_bools( + singleplayer: Option, + client_only: bool, + server_only: bool, + client_and_server: Option, +) -> (LegacySideType, LegacySideType) { + use LegacySideType::{Optional, Required, Unsupported}; + + let singleplayer = singleplayer.or(client_and_server).unwrap_or(false); + match (singleplayer, client_only, server_only) { // Only singleplayer (true, false, false) => (Required, Required), diff --git a/src/routes/v3/projects.rs b/src/routes/v3/projects.rs index 54a0127d6..3b35d163d 100644 --- a/src/routes/v3/projects.rs +++ b/src/routes/v3/projects.rs @@ -882,11 +882,31 @@ pub async fn edit_project_categories( Ok(()) } +#[derive(Serialize, Deserialize)] +pub struct ReturnSearchResults { + pub hits: Vec, + pub offset: usize, + pub limit: usize, + pub total_hits: usize, +} + pub async fn project_search( web::Query(info): web::Query, config: web::Data, ) -> Result { let results = search_for_project(&info, &config).await?; + + let results = ReturnSearchResults { + hits: results + .hits + .into_iter() + .filter_map(Project::from_search) + .collect::>(), + offset: results.offset, + limit: results.limit, + total_hits: results.total_hits, + }; + Ok(HttpResponse::Ok().json(results)) } diff --git a/src/search/indexing/local_import.rs b/src/search/indexing/local_import.rs index d2a915035..c5485eb55 100644 --- a/src/search/indexing/local_import.rs +++ b/src/search/indexing/local_import.rs @@ -6,171 +6,209 @@ use futures::TryStreamExt; use log::info; use super::IndexingError; -use crate::database::models::loader_fields::VersionField; -use crate::database::models::ProjectId; +use crate::database::models::{project_item, version_item, ProjectId, VersionId}; +use crate::database::redis::RedisPool; +use crate::models; use crate::search::UploadSearchProject; use sqlx::postgres::PgPool; pub async fn index_local( pool: PgPool, + redis: &RedisPool, ) -> Result<(Vec, Vec), IndexingError> { info!("Indexing local projects!"); let loader_field_keys: Arc> = Arc::new(DashSet::new()); - let uploads = - sqlx::query!( - " - WITH version_fields_cte AS ( - SELECT version_id, field_id, int_value, enum_value, string_value - FROM version_fields - ), - version_fields_json AS ( - SELECT DISTINCT version_id, - JSONB_AGG( - DISTINCT jsonb_build_object('field_id', field_id, 'int_value', int_value, 'enum_value', enum_value, 'string_value', string_value) - ) version_fields_json - FROM version_fields_cte - GROUP BY version_id - ), - loader_fields_cte AS ( - SELECT DISTINCT vf.version_id, lf.*, l.loader - FROM loader_fields lf - INNER JOIN version_fields_cte vf ON lf.id = vf.field_id - LEFT JOIN loaders_versions lv ON vf.version_id = lv.version_id - LEFT JOIN loaders l ON lv.loader_id = l.id - GROUP BY vf.version_id, lf.enum_type, lf.id, l.loader - ), - loader_fields_json AS ( - SELECT DISTINCT version_id, - JSONB_AGG( - DISTINCT jsonb_build_object( - 'version_id', lf.version_id, - 'lf_id', id, 'loader_name', loader, 'field', field, 'field_type', field_type, 'enum_type', enum_type, 'min_val', min_val, 'max_val', max_val, 'optional', optional - ) - ) filter (where lf.id is not null) loader_fields_json - FROM loader_fields_cte lf - GROUP BY version_id - ), - loader_field_enum_values_json AS ( - SELECT DISTINCT version_id, - JSONB_AGG( - DISTINCT jsonb_build_object( - 'id', lfev.id, 'enum_id', lfev.enum_id, 'value', lfev.value, 'ordering', lfev.ordering, 'created', lfev.created, 'metadata', lfev.metadata - ) - ) filter (where lfev.id is not null) loader_field_enum_values_json - FROM loader_field_enum_values lfev - INNER JOIN loader_fields_cte lf on lf.enum_type = lfev.enum_id - GROUP BY version_id - ) - SELECT m.id id, v.id version_id, m.name name, m.description description, m.downloads downloads, m.follows follows, - m.icon_url icon_url, m.published published, m.approved approved, m.updated updated, - m.team_id team_id, m.license license, m.slug slug, m.status status_name, m.color color, - u.username username, - ARRAY_AGG(DISTINCT c.category) filter (where c.category is not null and mc.is_additional is false) categories, - ARRAY_AGG(DISTINCT c.category) filter (where c.category is not null and mc.is_additional is true) additional_categories, - ARRAY_AGG(DISTINCT lo.loader) filter (where lo.loader is not null) loaders, - ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types, - ARRAY_AGG(DISTINCT g.slug) filter (where g.slug is not null) games, - ARRAY_AGG(DISTINCT mg.image_url) filter (where mg.image_url is not null and mg.featured is false) gallery, - ARRAY_AGG(DISTINCT mg.image_url) filter (where mg.image_url is not null and mg.featured is true) featured_gallery, - vf.version_fields_json version_fields, - lf.loader_fields_json loader_fields, - lfev.loader_field_enum_values_json loader_field_enum_values - FROM versions v - INNER JOIN mods m ON v.mod_id = m.id AND m.status = ANY($2) - LEFT OUTER JOIN mods_categories mc ON joining_mod_id = m.id - LEFT OUTER JOIN categories c ON mc.joining_category_id = c.id - LEFT OUTER JOIN loaders_versions lv ON lv.version_id = v.id - LEFT OUTER JOIN loaders lo ON lo.id = lv.loader_id - LEFT JOIN loaders_project_types lpt ON lpt.joining_loader_id = lo.id - LEFT JOIN project_types pt ON pt.id = lpt.joining_project_type_id - LEFT JOIN loaders_project_types_games lptg ON lptg.loader_id = lo.id AND lptg.project_type_id = pt.id - LEFT JOIN games g ON lptg.game_id = g.id - LEFT OUTER JOIN mods_gallery mg ON mg.mod_id = m.id - INNER JOIN team_members tm ON tm.team_id = m.team_id AND tm.is_owner = TRUE AND tm.accepted = TRUE - INNER JOIN users u ON tm.user_id = u.id - LEFT OUTER JOIN version_fields_json vf ON v.id = vf.version_id - LEFT OUTER JOIN loader_fields_json lf ON v.id = lf.version_id - LEFT OUTER JOIN loader_field_enum_values_json lfev ON v.id = lfev.version_id - WHERE v.status != ANY($1) - GROUP BY v.id, vf.version_fields_json, lf.loader_fields_json, lfev.loader_field_enum_values_json, m.id, u.id; - ", - &*crate::models::projects::VersionStatus::iterator().filter(|x| x.is_hidden()).map(|x| x.to_string()).collect::>(), - &*crate::models::projects::ProjectStatus::iterator().filter(|x| x.is_searchable()).map(|x| x.to_string()).collect::>(), - ) - .fetch_many(&pool) - .try_filter_map(|e| { - let loader_field_keys = loader_field_keys.clone(); - async move { - Ok(e.right().map(|m| { - let mut additional_categories = m.additional_categories.unwrap_or_default(); - let mut categories = m.categories.unwrap_or_default(); + let all_visible_ids: HashMap = sqlx::query!( + " + SELECT v.id id, m.id mod_id, u.username owner_username + + FROM versions v + INNER JOIN mods m ON v.mod_id = m.id AND m.status = ANY($2) + INNER JOIN team_members tm ON tm.team_id = m.team_id AND tm.is_owner = TRUE AND tm.accepted = TRUE + INNER JOIN users u ON tm.user_id = u.id + WHERE v.status != ANY($1) + GROUP BY v.id, m.id, u.id + ORDER BY m.id DESC; + ", + &*crate::models::projects::VersionStatus::iterator() + .filter(|x| x.is_hidden()) + .map(|x| x.to_string()) + .collect::>(), + &*crate::models::projects::ProjectStatus::iterator() + .filter(|x| x.is_searchable()) + .map(|x| x.to_string()) + .collect::>(), + ) + .fetch_many(&pool) + .try_filter_map(|e| async move { + Ok(e.right().map(|m| { + let project_id: ProjectId = ProjectId(m.mod_id); + let version_id: VersionId = VersionId(m.id); + (version_id, (project_id, m.owner_username)) + })) + }) + .try_collect::>() + .await?; - categories.append(&mut m.loaders.unwrap_or_default()); + let project_ids = all_visible_ids + .values() + .map(|(project_id, _)| project_id) + .cloned() + .collect::>(); + let projects: HashMap<_, _> = project_item::Project::get_many_ids(&project_ids, &pool, redis) + .await? + .into_iter() + .map(|p| (p.inner.id, p)) + .collect(); - let display_categories = categories.clone(); - categories.append(&mut additional_categories); + let version_ids = all_visible_ids.keys().cloned().collect::>(); + let versions: HashMap<_, _> = version_item::Version::get_many(&version_ids, &pool, redis) + .await? + .into_iter() + .map(|v| (v.inner.id, v)) + .collect(); - let version_fields = VersionField::from_query_json(m.loader_fields, m.version_fields, m.loader_field_enum_values, false); + let mut uploads = Vec::new(); + // TODO: could possibly clone less here? + for (version_id, (project_id, owner_username)) in all_visible_ids { + let m = projects.get(&project_id); + let v = versions.get(&version_id); - let loader_fields : HashMap> = version_fields.into_iter().map(|vf| { - (vf.field_name, vf.value.as_strings()) - }).collect(); + let m = match m { + Some(m) => m, + None => continue, + }; - for v in loader_fields.keys().cloned() { - loader_field_keys.insert(v); - } + let v = match v { + Some(v) => v, + None => continue, + }; - let project_id: crate::models::projects::ProjectId = ProjectId(m.id).into(); - let version_id: crate::models::projects::ProjectId = ProjectId(m.version_id).into(); + let version_id: crate::models::projects::VersionId = v.inner.id.into(); + let project_id: crate::models::projects::ProjectId = m.inner.id.into(); + let team_id: crate::models::teams::TeamId = m.inner.team_id.into(); + let organization_id: Option = + m.inner.organization_id.map(|x| x.into()); + let thread_id: crate::models::threads::ThreadId = m.thread_id.into(); - let license = match m.license.split(' ').next() { - Some(license) => license.to_string(), - None => m.license, - }; + let all_version_ids = m + .versions + .iter() + .map(|v| (*v).into()) + .collect::>(); - let open_source = match spdx::license_id(&license) { - Some(id) => id.is_osi_approved(), - _ => false, - }; + let mut additional_categories = m.additional_categories.clone(); + let mut categories = m.categories.clone(); - // SPECIAL BEHAVIOUR - // Todo: revisit. - // For consistency with v2 searching, we consider the loader field 'mrpack_loaders' to be a category. - // These were previously considered the loader, and in v2, the loader is a category for searching. - // So to avoid breakage or awkward conversions, we just consider those loader_fields to be categories. - // The loaders are kept in loader_fields as well, so that no information is lost on retrieval. - let mrpack_loaders = loader_fields.get("mrpack_loaders").cloned().unwrap_or_default(); - categories.extend(mrpack_loaders); + // Uses version loaders, not project loaders. + categories.append(&mut v.loaders.clone()); + + let display_categories = categories.clone(); + categories.append(&mut additional_categories); + + let version_fields = v.version_fields.clone(); + let loader_fields = models::projects::from_duplicate_version_fields(version_fields); + for v in loader_fields.keys().cloned() { + loader_field_keys.insert(v); + } + + let license = match m.inner.license.split(' ').next() { + Some(license) => license.to_string(), + None => m.inner.license.clone(), + }; + + let open_source = match spdx::license_id(&license) { + Some(id) => id.is_osi_approved(), + _ => false, + }; + + // For loaders, get ALL loaders across ALL versions + let mut loaders = all_version_ids + .iter() + .fold(vec![], |mut loaders, version_id| { + let version = versions.get(&(*version_id).into()); + if let Some(version) = version { + loaders.extend(version.loaders.clone()); + } + loaders + }); + loaders.sort(); + loaders.dedup(); + + // SPECIAL BEHAVIOUR + // Todo: revisit. + // For consistency with v2 searching, we consider the loader field 'mrpack_loaders' to be a category. + // These were previously considered the loader, and in v2, the loader is a category for searching. + // So to avoid breakage or awkward conversions, we just consider those loader_fields to be categories. + // The loaders are kept in loader_fields as well, so that no information is lost on retrieval. + let mrpack_loaders = loader_fields + .get("mrpack_loaders") + .cloned() + .map(|x| { + x.into_iter() + .filter_map(|x| x.as_str().map(String::from)) + .collect::>() + }) + .unwrap_or_default(); + categories.extend(mrpack_loaders); + + let gallery = m + .gallery_items + .iter() + .filter(|gi| !gi.featured) + .map(|gi| gi.image_url.clone()) + .collect::>(); + let featured_gallery = m + .gallery_items + .iter() + .filter(|gi| gi.featured) + .map(|gi| gi.image_url.clone()) + .collect::>(); + let featured_gallery = featured_gallery.first().cloned(); + + let usp = UploadSearchProject { + version_id: version_id.to_string(), + project_id: project_id.to_string(), + name: m.inner.name.clone(), + summary: m.inner.summary.clone(), + categories, + follows: m.inner.follows, + downloads: m.inner.downloads, + icon_url: m.inner.icon_url.clone(), + author: owner_username, + date_created: m.inner.approved.unwrap_or(m.inner.published), + created_timestamp: m.inner.approved.unwrap_or(m.inner.published).timestamp(), + date_modified: m.inner.updated, + modified_timestamp: m.inner.updated.timestamp(), + license, + slug: m.inner.slug.clone(), + project_types: m.project_types.clone(), + gallery, + featured_gallery, + display_categories, + open_source, + color: m.inner.color, + loader_fields, + license_url: m.inner.license_url.clone(), + monetization_status: Some(m.inner.monetization_status), + team_id: team_id.to_string(), + organization_id: organization_id.map(|x| x.to_string()), + thread_id: thread_id.to_string(), + versions: all_version_ids.iter().map(|x| x.to_string()).collect(), + date_published: m.inner.published, + date_queued: m.inner.queued, + status: m.inner.status, + requested_status: m.inner.requested_status, + games: m.games.clone(), + links: m.urls.clone(), + gallery_items: m.gallery_items.clone(), + loaders, + }; + + uploads.push(usp); + } - UploadSearchProject { - version_id: version_id.to_string(), - project_id: project_id.to_string(), - name: m.name, - description: m.description, - categories, - follows: m.follows, - downloads: m.downloads, - icon_url: m.icon_url.unwrap_or_default(), - author: m.username, - date_created: m.approved.unwrap_or(m.published), - created_timestamp: m.approved.unwrap_or(m.published).timestamp(), - date_modified: m.updated, - modified_timestamp: m.updated.timestamp(), - license, - slug: m.slug, - project_types: m.project_types.unwrap_or_default(), - gallery: m.gallery.unwrap_or_default(), - display_categories, - open_source, - color: m.color.map(|x| x as u32), - featured_gallery: m.featured_gallery.unwrap_or_default().first().cloned(), - loader_fields - } - })) -}}) - .try_collect::>() - .await?; Ok(( uploads, Arc::try_unwrap(loader_field_keys) diff --git a/src/search/indexing/mod.rs b/src/search/indexing/mod.rs index 2f367fdc8..7fbef6e44 100644 --- a/src/search/indexing/mod.rs +++ b/src/search/indexing/mod.rs @@ -1,6 +1,7 @@ /// This module is used for the indexing from any source. pub mod local_import; +use crate::database::redis::RedisPool; use crate::search::{SearchConfig, UploadSearchProject}; use local_import::index_local; use meilisearch_sdk::client::Client; @@ -30,11 +31,15 @@ pub enum IndexingError { // assumes a max average size of 1KiB per project to avoid this cap. const MEILISEARCH_CHUNK_SIZE: usize = 10000; -pub async fn index_projects(pool: PgPool, config: &SearchConfig) -> Result<(), IndexingError> { +pub async fn index_projects( + pool: PgPool, + redis: RedisPool, + config: &SearchConfig, +) -> Result<(), IndexingError> { let mut docs_to_add: Vec = vec![]; let mut additional_fields: Vec = vec![]; - let (mut uploads, mut loader_fields) = index_local(pool.clone()).await?; + let (mut uploads, mut loader_fields) = index_local(pool.clone(), &redis).await?; docs_to_add.append(&mut uploads); additional_fields.append(&mut loader_fields); @@ -186,7 +191,7 @@ const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[ "slug", "author", "name", - "description", + "summary", "categories", "display_categories", "downloads", @@ -199,9 +204,26 @@ const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[ "gallery", "featured_gallery", "color", + // Note: loader fields are not here, but are added on as they are needed (so they can be dynamically added depending on which exist). + + // Non-searchable fields for filling out the Project model. + "license_url", + "monetization_status", + "team_id", + "thread_id", + "versions", + "date_published", + "date_queued", + "status", + "requested_status", + "games", + "organization_id", + "links", + "gallery_items", + "loaders", // search uses loaders as categories- this is purely for the Project model. ]; -const DEFAULT_SEARCHABLE_ATTRIBUTES: &[&str] = &["name", "description", "author", "slug"]; +const DEFAULT_SEARCHABLE_ATTRIBUTES: &[&str] = &["name", "summary", "author", "slug"]; const DEFAULT_ATTRIBUTES_FOR_FACETING: &[&str] = &[ "categories", diff --git a/src/search/mod.rs b/src/search/mod.rs index 09c6767cc..83160325a 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -1,5 +1,6 @@ +use crate::database::models::project_item::{GalleryItem, LinkUrl}; use crate::models::error::ApiError; -use crate::models::projects::SearchRequest; +use crate::models::projects::{MonetizationStatus, ProjectStatus, SearchRequest}; use actix_web::http::StatusCode; use actix_web::HttpResponse; use chrono::{DateTime, Utc}; @@ -80,12 +81,12 @@ pub struct UploadSearchProject { pub slug: Option, pub author: String, pub name: String, - pub description: String, + pub summary: String, pub categories: Vec, pub display_categories: Vec, pub follows: i32, pub downloads: i32, - pub icon_url: String, + pub icon_url: Option, pub license: String, pub gallery: Vec, pub featured_gallery: Option, @@ -100,8 +101,24 @@ pub struct UploadSearchProject { pub open_source: bool, pub color: Option, + // Hidden fields to get the Project model out of the search results. + pub license_url: Option, + pub monetization_status: Option, + pub team_id: String, + pub thread_id: String, + pub versions: Vec, + pub date_published: DateTime, + pub date_queued: Option>, + pub status: ProjectStatus, + pub requested_status: Option, + pub loaders: Vec, // Search uses loaders as categories- this is purely for the Project model. + pub links: Vec, + pub gallery_items: Vec, // Gallery *only* urls are stored in gallery, but the gallery items are stored here- required for the Project model. + pub games: Vec, // Todo: in future, could be a searchable field. + pub organization_id: Option, // Todo: in future, could be a searchable field. + #[serde(flatten)] - pub loader_fields: HashMap>, + pub loader_fields: HashMap>, } #[derive(Serialize, Deserialize, Debug)] @@ -120,12 +137,12 @@ pub struct ResultSearchProject { pub slug: Option, pub author: String, pub name: String, - pub description: String, + pub summary: String, pub categories: Vec, pub display_categories: Vec, pub downloads: i32, pub follows: i32, - pub icon_url: String, + pub icon_url: Option, /// RFC 3339 formatted creation date of the project pub date_created: String, /// RFC 3339 formatted modification date of the project @@ -135,8 +152,24 @@ pub struct ResultSearchProject { pub featured_gallery: Option, pub color: Option, + // Hidden fields to get the Project model out of the search results. + pub license_url: Option, + pub monetization_status: Option, + pub team_id: String, + pub thread_id: String, + pub versions: Vec, + pub date_published: String, + pub date_queued: Option, + pub status: String, + pub requested_status: Option, + pub loaders: Vec, // Search uses loaders as categories- this is purely for the Project model. + pub links: Vec, + pub games: Vec, // Todo: in future, could be a searchable field. + pub gallery_items: Vec, // Gallery *only* urls are stored in gallery, but the gallery items are stored here- required for the Project model. + pub organization_id: Option, // Todo: in future, could be a searchable field. + #[serde(flatten)] - pub loader_fields: HashMap>, + pub loader_fields: HashMap>, } pub fn get_sort_index(index: &str) -> Result<(&str, [&str; 1]), SearchError> { diff --git a/tests/common/api_v3/project.rs b/tests/common/api_v3/project.rs index 048b95a1a..41734d7f9 100644 --- a/tests/common/api_v3/project.rs +++ b/tests/common/api_v3/project.rs @@ -8,7 +8,10 @@ use actix_web::{ use async_trait::async_trait; use bytes::Bytes; use chrono::{DateTime, Utc}; -use labrinth::{models::projects::Project, search::SearchResults, util::actix::AppendsMultipart}; +use labrinth::{ + models::projects::Project, routes::v3::projects::ReturnSearchResults, + util::actix::AppendsMultipart, +}; use rust_decimal::Decimal; use serde_json::json; @@ -222,7 +225,7 @@ impl ApiV3 { query: Option<&str>, facets: Option, pat: &str, - ) -> SearchResults { + ) -> ReturnSearchResults { let query_field = if let Some(query) = query { format!("&query={}", urlencoding::encode(query)) } else { diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 85eae894f..27c2f475c 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -12,6 +12,7 @@ pub mod environment; pub mod pats; pub mod permissions; pub mod scopes; +pub mod search; // Testing equivalent to 'setup' function, producing a LabrinthConfig // If making a test, you should probably use environment::TestEnvironment::build() (which calls this) diff --git a/tests/common/search.rs b/tests/common/search.rs new file mode 100644 index 000000000..58678f450 --- /dev/null +++ b/tests/common/search.rs @@ -0,0 +1,212 @@ +#![allow(dead_code)] + +use std::{collections::HashMap, sync::Arc}; + +use serde_json::json; + +use crate::common::{ + api_common::{Api, ApiProject, ApiVersion}, + database::{FRIEND_USER_PAT, MOD_USER_PAT, USER_USER_PAT}, + dummy_data::{TestFile, DUMMY_CATEGORIES}, +}; + +use super::{api_v3::ApiV3, environment::TestEnvironment}; + +pub async fn setup_search_projects(test_env: &TestEnvironment) -> Arc> { + // Test setup and dummy data + let api = &test_env.api; + let test_name = test_env.db.database_name.clone(); + + // Add dummy projects of various categories for searchability + let mut project_creation_futures = vec![]; + + let create_async_future = + |id: u64, pat: &'static str, is_modpack: bool, modify_json: Option| { + let slug = format!("{test_name}-searchable-project-{id}"); + + let jar = if is_modpack { + TestFile::build_random_mrpack() + } else { + TestFile::build_random_jar() + }; + async move { + // Add a project- simple, should work. + let req = api.add_public_project(&slug, Some(jar), modify_json, pat); + let (project, _) = req.await; + + // Approve, so that the project is searchable + let resp = api + .edit_project( + &project.id.to_string(), + json!({ + "status": "approved" + }), + MOD_USER_PAT, + ) + .await; + assert_eq!(resp.status(), 204); + (project.id.0, id) + } + }; + + // Test project 0 + let id = 0; + let modify_json = serde_json::from_value(json!([ + { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[4..6] }, + { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, + { "op": "add", "path": "/license_id", "value": "LGPL-3.0-or-later" }, + ])) + .unwrap(); + project_creation_futures.push(create_async_future( + id, + USER_USER_PAT, + false, + Some(modify_json), + )); + + // Test project 1 + let id = 1; + let modify_json = serde_json::from_value(json!([ + { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[0..2] }, + { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, + ])) + .unwrap(); + project_creation_futures.push(create_async_future( + id, + USER_USER_PAT, + false, + Some(modify_json), + )); + + // Test project 2 + let id = 2; + let modify_json = serde_json::from_value(json!([ + { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[0..2] }, + { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, + { "op": "add", "path": "/name", "value": "Mysterious Project" }, + ])) + .unwrap(); + project_creation_futures.push(create_async_future( + id, + USER_USER_PAT, + false, + Some(modify_json), + )); + + // Test project 3 + let id = 3; + let modify_json = serde_json::from_value(json!([ + { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[0..3] }, + { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, + { "op": "add", "path": "/initial_versions/0/game_versions", "value": ["1.20.4"] }, + { "op": "add", "path": "/name", "value": "Mysterious Project" }, + { "op": "add", "path": "/license_id", "value": "LicenseRef-All-Rights-Reserved" }, + ])) + .unwrap(); + project_creation_futures.push(create_async_future( + id, + FRIEND_USER_PAT, + false, + Some(modify_json), + )); + + // Test project 4 + let id = 4; + let modify_json = serde_json::from_value(json!([ + { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[0..3] }, + { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, + { "op": "add", "path": "/initial_versions/0/game_versions", "value": ["1.20.5"] }, + ])) + .unwrap(); + project_creation_futures.push(create_async_future( + id, + USER_USER_PAT, + true, + Some(modify_json), + )); + + // Test project 5 + let id = 5; + let modify_json = serde_json::from_value(json!([ + { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[5..6] }, + { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, + { "op": "add", "path": "/initial_versions/0/game_versions", "value": ["1.20.5"] }, + { "op": "add", "path": "/license_id", "value": "LGPL-3.0-or-later" }, + ])) + .unwrap(); + project_creation_futures.push(create_async_future( + id, + USER_USER_PAT, + false, + Some(modify_json), + )); + + // Test project 6 + let id = 6; + let modify_json = serde_json::from_value(json!([ + { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[5..6] }, + { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, + { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, + { "op": "add", "path": "/license_id", "value": "LGPL-3.0-or-later" }, + ])) + .unwrap(); + project_creation_futures.push(create_async_future( + id, + FRIEND_USER_PAT, + false, + Some(modify_json), + )); + + // Test project 7 (testing the search bug) + // This project has an initial private forge version that is 1.20.3, and a fabric 1.20.5 version. + // This means that a search for fabric + 1.20.3 or forge + 1.20.5 should not return this project. + let id = 7; + let modify_json = serde_json::from_value(json!([ + { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[5..6] }, + { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, + { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, + { "op": "add", "path": "/license_id", "value": "LGPL-3.0-or-later" }, + { "op": "add", "path": "/initial_versions/0/loaders", "value": ["forge"] }, + { "op": "add", "path": "/initial_versions/0/game_versions", "value": ["1.20.2"] }, + ])) + .unwrap(); + project_creation_futures.push(create_async_future( + id, + USER_USER_PAT, + false, + Some(modify_json), + )); + + // Await all project creation + // Returns a mapping of: + // project id -> test id + let id_conversion: Arc> = Arc::new( + futures::future::join_all(project_creation_futures) + .await + .into_iter() + .collect(), + ); + + // Create a second version for project 7 + let project_7 = api + .get_project_deserialized_common( + &format!("{test_name}-searchable-project-7"), + USER_USER_PAT, + ) + .await; + api.add_public_version( + project_7.id, + "1.0.0", + TestFile::build_random_jar(), + None, + None, + USER_USER_PAT, + ) + .await; + + // Forcibly reset the search index + let resp = api.reset_search_index().await; + assert_eq!(resp.status(), 204); + + id_conversion +} diff --git a/tests/project.rs b/tests/project.rs index bb35761d8..32ba86161 100644 --- a/tests/project.rs +++ b/tests/project.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use actix_http::StatusCode; use actix_web::test; use bytes::Bytes; @@ -9,10 +11,11 @@ use common::dummy_data::DUMMY_CATEGORIES; use common::environment::{with_test_environment, with_test_environment_all, TestEnvironment}; use common::permissions::{PermissionsTest, PermissionsTestContext}; +use common::search::setup_search_projects; use futures::StreamExt; use labrinth::database::models::project_item::{PROJECTS_NAMESPACE, PROJECTS_SLUGS_NAMESPACE}; use labrinth::models::ids::base62_impl::parse_base62; -use labrinth::models::projects::ProjectId; +use labrinth::models::projects::{Project, ProjectId}; use labrinth::models::teams::ProjectPermissions; use labrinth::util::actix::{AppendsMultipart, MultipartSegment, MultipartSegmentData}; use serde_json::json; @@ -1093,6 +1096,47 @@ async fn project_permissions_consistency_test() { .await; } +#[actix_rt::test] +async fn align_search_projects() { + // Test setup and dummy data + with_test_environment(Some(10), |test_env: TestEnvironment| async move { + setup_search_projects(&test_env).await; + + let api = &test_env.api; + let test_name = test_env.db.database_name.clone(); + + let projects = api + .search_deserialized( + Some(&test_name), + Some(json!([["categories:fabric"]])), + USER_USER_PAT, + ) + .await; + + for mut project in projects.hits { + let project_model = api + .get_project(&project.id.to_string(), USER_USER_PAT) + .await; + let mut project_model: Project = test::read_body_json(project_model).await; + + // Body/description is huge- don't store it in search, so it's OK if they differ here + // (Search should return "") + project_model.description = "".into(); + + // Aggregate project loader fields will not match exactly, + // because the search will only return the matching version, whereas the project returns the aggregate. + // So, we remove them from both. + project_model.fields = HashMap::new(); + project.fields = HashMap::new(); + + let project_model = serde_json::to_value(project_model).unwrap(); + let searched_project_serialized = serde_json::to_value(project).unwrap(); + assert_eq!(project_model, searched_project_serialized); + } + }) + .await +} + // Route tests: // TODO: Missing routes on projects // TODO: using permissions/scopes, can we SEE projects existence that we are not allowed to? (ie 401 instead of 404) diff --git a/tests/search.rs b/tests/search.rs index 949918b95..aabacd5b5 100644 --- a/tests/search.rs +++ b/tests/search.rs @@ -1,19 +1,13 @@ use common::api_v3::ApiV3; use common::database::*; -use common::dummy_data::TestFile; + use common::dummy_data::DUMMY_CATEGORIES; use common::environment::with_test_environment; use common::environment::TestEnvironment; +use common::search::setup_search_projects; use futures::stream::StreamExt; -use labrinth::models::ids::base62_impl::parse_base62; use serde_json::json; -use std::collections::HashMap; -use std::sync::Arc; - -use crate::common::api_common::Api; -use crate::common::api_common::ApiProject; -use crate::common::api_common::ApiVersion; mod common; @@ -24,199 +18,11 @@ mod common; async fn search_projects() { // Test setup and dummy data with_test_environment(Some(10), |test_env: TestEnvironment| async move { + let id_conversion = setup_search_projects(&test_env).await; + let api = &test_env.api; let test_name = test_env.db.database_name.clone(); - // Add dummy projects of various categories for searchability - let mut project_creation_futures = vec![]; - - let create_async_future = - |id: u64, - pat: &'static str, - is_modpack: bool, - modify_json: Option| { - let slug = format!("{test_name}-searchable-project-{id}"); - - let jar = if is_modpack { - TestFile::build_random_mrpack() - } else { - TestFile::build_random_jar() - }; - async move { - // Add a project- simple, should work. - let req = api.add_public_project(&slug, Some(jar), modify_json, pat); - let (project, _) = req.await; - - // Approve, so that the project is searchable - let resp = api - .edit_project( - &project.id.to_string(), - json!({ - "status": "approved" - }), - MOD_USER_PAT, - ) - .await; - assert_eq!(resp.status(), 204); - (project.id.0, id) - } - }; - - // Test project 0 - let id = 0; - let modify_json = serde_json::from_value(json!([ - { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[4..6] }, - { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, - { "op": "add", "path": "/license_id", "value": "LGPL-3.0-or-later" }, - ])) - .unwrap(); - project_creation_futures.push(create_async_future( - id, - USER_USER_PAT, - false, - Some(modify_json), - )); - - // Test project 1 - let id = 1; - let modify_json = serde_json::from_value(json!([ - { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[0..2] }, - { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, - ])) - .unwrap(); - project_creation_futures.push(create_async_future( - id, - USER_USER_PAT, - false, - Some(modify_json), - )); - - // Test project 2 - let id = 2; - let modify_json = serde_json::from_value(json!([ - { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[0..2] }, - { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, - { "op": "add", "path": "/name", "value": "Mysterious Project" }, - ])) - .unwrap(); - project_creation_futures.push(create_async_future( - id, - USER_USER_PAT, - false, - Some(modify_json), - )); - - // Test project 3 - let id = 3; - let modify_json = serde_json::from_value(json!([ - { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[0..3] }, - { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, - { "op": "add", "path": "/initial_versions/0/game_versions", "value": ["1.20.4"] }, - { "op": "add", "path": "/name", "value": "Mysterious Project" }, - { "op": "add", "path": "/license_id", "value": "LicenseRef-All-Rights-Reserved" }, - ])) - .unwrap(); - project_creation_futures.push(create_async_future( - id, - FRIEND_USER_PAT, - false, - Some(modify_json), - )); - - // Test project 4 - let id = 4; - let modify_json = serde_json::from_value(json!([ - { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[0..3] }, - { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, - { "op": "add", "path": "/initial_versions/0/game_versions", "value": ["1.20.5"] }, - ])) - .unwrap(); - project_creation_futures.push(create_async_future( - id, - USER_USER_PAT, - true, - Some(modify_json), - )); - - // Test project 5 - let id = 5; - let modify_json = serde_json::from_value(json!([ - { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[5..6] }, - { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, - { "op": "add", "path": "/initial_versions/0/game_versions", "value": ["1.20.5"] }, - { "op": "add", "path": "/license_id", "value": "LGPL-3.0-or-later" }, - ])) - .unwrap(); - project_creation_futures.push(create_async_future( - id, - USER_USER_PAT, - false, - Some(modify_json), - )); - - // Test project 6 - let id = 6; - let modify_json = serde_json::from_value(json!([ - { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[5..6] }, - { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, - { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, - { "op": "add", "path": "/license_id", "value": "LGPL-3.0-or-later" }, - ])) - .unwrap(); - project_creation_futures.push(create_async_future( - id, - FRIEND_USER_PAT, - false, - Some(modify_json), - )); - - // Test project 7 (testing the search bug) - // This project has an initial private forge version that is 1.20.3, and a fabric 1.20.5 version. - // This means that a search for fabric + 1.20.3 or forge + 1.20.5 should not return this project. - let id = 7; - let modify_json = serde_json::from_value(json!([ - { "op": "add", "path": "/categories", "value": DUMMY_CATEGORIES[5..6] }, - { "op": "add", "path": "/initial_versions/0/client_only", "value": false }, - { "op": "add", "path": "/initial_versions/0/server_only", "value": true }, - { "op": "add", "path": "/license_id", "value": "LGPL-3.0-or-later" }, - { "op": "add", "path": "/initial_versions/0/loaders", "value": ["forge"] }, - { "op": "add", "path": "/initial_versions/0/game_versions", "value": ["1.20.2"] }, - ])) - .unwrap(); - project_creation_futures.push(create_async_future( - id, - USER_USER_PAT, - false, - Some(modify_json), - )); - - // Await all project creation - // Returns a mapping of: - // project id -> test id - let id_conversion: Arc> = Arc::new( - futures::future::join_all(project_creation_futures) - .await - .into_iter() - .collect(), - ); - - // Create a second version for project 7 - let project_7 = api - .get_project_deserialized_common( - &format!("{test_name}-searchable-project-7"), - USER_USER_PAT, - ) - .await; - api.add_public_version( - project_7.id, - "1.0.0", - TestFile::build_random_jar(), - None, - None, - USER_USER_PAT, - ) - .await; - // Pairs of: // 1. vec of search facets // 2. expected project ids to be returned by this search @@ -277,10 +83,6 @@ async fn search_projects() { // - modified_timestamp (not varied) // TODO: multiple different project types test - // Forcibly reset the search index - let resp = api.reset_search_index().await; - assert_eq!(resp.status(), 204); - // Test searches let stream = futures::stream::iter(pairs); stream @@ -294,10 +96,11 @@ async fn search_projects() { let mut found_project_ids: Vec = projects .hits .into_iter() - .map(|p| id_conversion[&parse_base62(&p.project_id).unwrap()]) + .map(|p| id_conversion[&p.id.0]) .collect(); expected_project_ids.sort(); found_project_ids.sort(); + println!("Facets: {:?}", facets); assert_eq!(found_project_ids, expected_project_ids); } }) diff --git a/tests/v2/search.rs b/tests/v2/search.rs index 22e7ffa4f..1598ad3ea 100644 --- a/tests/v2/search.rs +++ b/tests/v2/search.rs @@ -305,6 +305,51 @@ async fn search_projects() { } }) .await; + + // A couple additional tests for the saerch type returned, making sure it is properly translated back + let client_side_required = api + .search_deserialized( + Some(&test_name), + Some(json!([["client_side:required"]])), + USER_USER_PAT, + ) + .await; + for hit in client_side_required.hits { + assert_eq!(hit.client_side, "required".to_string()); + } + + let server_side_required = api + .search_deserialized( + Some(&test_name), + Some(json!([["server_side:required"]])), + USER_USER_PAT, + ) + .await; + for hit in server_side_required.hits { + assert_eq!(hit.server_side, "required".to_string()); + } + + let client_side_unsupported = api + .search_deserialized( + Some(&test_name), + Some(json!([["client_side:unsupported"]])), + USER_USER_PAT, + ) + .await; + for hit in client_side_unsupported.hits { + assert_eq!(hit.client_side, "unsupported".to_string()); + } + + let game_versions = api + .search_deserialized( + Some(&test_name), + Some(json!([["versions:1.20.5"]])), + USER_USER_PAT, + ) + .await; + for hit in game_versions.hits { + assert_eq!(hit.versions, vec!["1.20.5".to_string()]); + } }) .await; }