Refactor Meilisearch, update to latest SDK, and implement faceted search (#44)

* feat(indexing): Reindex curseforge & local database at an interval

* fix(indexing): Use strings for meilisearch primary key

Fixes #17 by prefixing curseforge ids with "curse-" and local ids
with "local-".

* feat(indexing): Add newly created mods to the index more quickly

* feat(indexing): Implement faceted search, update to meilisearch master

Fixes #9, but only uses faceted search for categories.  It should
be reasonably simple to add support for versions, but it may not
be as useful due to the large number of versions and the large
number of supported versions for each mod.

* feat(indexing): Allow skipping initial indexing

Co-authored-by: Geometrically <18202329+Geometrically@users.noreply.github.com>
This commit is contained in:
Aeledfyr 2020-07-27 18:54:10 -05:00 committed by GitHub
parent 7914e89212
commit ff28ea8fa8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 441 additions and 201 deletions

7
.env
View File

@ -1,4 +1,3 @@
INDEX_CURSEFORGE=false
DEBUG=true
CDN_URL=cdn.modrinth.com
@ -14,3 +13,9 @@ BACKBLAZE_ENABLED=false
BACKBLAZE_KEY_ID=none
BACKBLAZE_KEY=none
BACKBLAZE_BUCKET_ID=none
INDEX_CURSEFORGE=false
# 1 hour
LOCAL_INDEX_INTERVAL=3600
# 4 hours
EXTERNAL_INDEX_INTERVAL=14400

39
Cargo.lock generated
View File

@ -1145,6 +1145,7 @@ dependencies = [
"serde_json",
"sha1",
"sqlx",
"sqlx-macros",
"thiserror",
]
@ -1257,11 +1258,10 @@ dependencies = [
[[package]]
name = "meilisearch-sdk"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e99e00b31a0e33add02a01c969a108144882ef27915d81292deda71baa6b6eea"
source = "git+https://github.com/Aeledfyr/meilisearch-rust#ba1f1e530cb383f421273f6863378bc9bc222f7b"
dependencies = [
"log",
"minreq",
"reqwest",
"serde",
"serde_json",
"urlencoding",
@ -1301,12 +1301,6 @@ dependencies = [
"adler32",
]
[[package]]
name = "minreq"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab229c252995e9d56cc66857f3ab2c41e3138b1a6c92089f013698388e64d6bd"
[[package]]
name = "mio"
version = "0.6.22"
@ -1349,33 +1343,6 @@ dependencies = [
"ws2_32-sys",
]
[[package]]
name = "modrinth"
version = "0.1.0"
dependencies = [
"actix-files",
"actix-multipart",
"actix-rt",
"actix-web",
"async-trait",
"base64 0.12.3",
"chrono",
"dotenv",
"env_logger",
"futures",
"futures-timer",
"log",
"meilisearch-sdk",
"rand",
"reqwest",
"serde",
"serde_json",
"sha1",
"sqlx",
"sqlx-macros",
"thiserror",
]
[[package]]
name = "native-tls"
version = "0.2.4"

View File

@ -19,8 +19,6 @@ actix-multipart = "0.2.0"
reqwest = {version="0.10.6", features=["json"]}
meilisearch-sdk = "0.1.4"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
chrono = { version = "0.4", features = ["serde"] }
@ -50,3 +48,8 @@ git = "https://github.com/launchbadge/sqlx/"
branch = "master"
default-features = false
features = ["runtime-actix", "postgres", "chrono", "offline"]
[dependencies.meilisearch-sdk]
# Temp fork with some patches
git = "https://github.com/Aeledfyr/meilisearch-rust"
branch = "master"

View File

@ -1,16 +1,16 @@
use crate::search::indexing::index_mods;
use actix_web::middleware::Logger;
use actix_web::{web, App, HttpServer};
use env_logger::Env;
use log::info;
use std::env;
use std::fs::File;
use log::{info, warn};
use search::indexing::index_mods;
use search::indexing::IndexingSettings;
use std::sync::Arc;
mod database;
mod file_hosting;
mod models;
mod routes;
mod scheduler;
mod search;
#[actix_rt::main]
@ -24,7 +24,6 @@ async fn main() -> std::io::Result<()> {
let pool = database::connect()
.await
.expect("Database connection failed");
let client_ref = pool.clone();
let backblaze_enabled = dotenv::var("BACKBLAZE_ENABLED")
.ok()
@ -44,22 +43,97 @@ async fn main() -> std::io::Result<()> {
Arc::new(file_hosting::MockHost::new())
};
// Get executable path
let mut exe_path = env::current_exe()?.parent().unwrap().to_path_buf();
// Create the path to the index lock file
exe_path.push("index.v1.lock");
// TODO: use a real arg parsing library
let skip_initial = std::env::args().any(|x| x == "skip");
// Allow manually skipping the initial indexing for quicker iteration
// and startup times.
if skip_initial {
info!("Skipping initial indexing");
}
// Indexing mods if not already done
if env::args().any(|x| x == "regen") {
// User forced regen of indexing
info!("Forced regeneration of indexes!");
index_mods(pool).await.expect("Mod indexing failed");
} else if !exe_path.exists() {
// The indexes were not created, or the version was upgraded
info!("Indexing of mods for first time...");
index_mods(pool).await.expect("Mod indexing failed");
// Create the lock file
File::create(exe_path)?;
let mut scheduler = scheduler::Scheduler::new();
// The interval in seconds at which the local database is indexed
// for searching. Defaults to 1 hour if unset.
let local_index_interval = std::time::Duration::from_secs(
dotenv::var("LOCAL_INDEX_INTERVAL")
.ok()
.map(|i| i.parse().unwrap())
.unwrap_or(3600),
);
let pool_ref = pool.clone();
let mut skip = skip_initial;
scheduler.run(local_index_interval, move || {
let pool_ref = pool_ref.clone();
async move {
if skip {
skip = false;
return;
}
info!("Indexing local database");
let settings = IndexingSettings {
index_local: true,
index_external: false,
};
let result = index_mods(pool_ref, settings).await;
if let Err(e) = result {
warn!("Local mod indexing failed: {:?}", e);
}
info!("Done indexing local database");
}
});
let indexing_queue = Arc::new(search::indexing::queue::CreationQueue::new());
let queue_ref = indexing_queue.clone();
let mut skip = skip_initial;
scheduler.run(std::time::Duration::from_secs(15 * 60), move || {
let queue = queue_ref.clone();
async move {
if skip {
skip = false;
return;
}
info!("Indexing created mod queue");
let result = search::indexing::queue::index_queue(&*queue).await;
if let Err(e) = result {
warn!("Indexing created mods failed: {:?}", e);
}
info!("Done indexing created mod queue");
}
});
if dotenv::var("INDEX_CURSEFORGE")
.ok()
.and_then(|b| b.parse::<bool>().ok())
.unwrap_or(false)
{
// The interval in seconds at which curseforge is indexed for
// searching. Defaults to 4 hours if unset.
let external_index_interval = std::time::Duration::from_secs(
dotenv::var("EXTERNAL_INDEX_INTERVAL")
.ok()
.map(|i| i.parse().unwrap())
.unwrap_or(3600 * 4),
);
let pool_ref = pool.clone();
scheduler.run(external_index_interval, move || {
info!("Indexing curseforge");
let pool_ref = pool_ref.clone();
async move {
let settings = IndexingSettings {
index_local: false,
index_external: true,
};
let result = index_mods(pool_ref, settings).await;
if let Err(e) = result {
warn!("External mod indexing failed: {:?}", e);
}
info!("Done indexing curseforge");
}
});
}
info!("Starting Actix HTTP server!");
@ -69,7 +143,7 @@ async fn main() -> std::io::Result<()> {
App::new()
.wrap(Logger::default())
.wrap(Logger::new("%a %{User-Agent}i"))
.data(client_ref.clone())
.data(pool.clone())
.data(file_host.clone())
.service(routes::index_get)
.service(routes::mod_search)
@ -89,14 +163,14 @@ fn check_env_vars() {
.and_then(|s| s.parse::<T>().ok())
.is_none()
{
log::warn!(
warn!(
"Variable `{}` missing in dotenv or not of type `{}`",
var,
std::any::type_name::<T>()
)
}
}
check_var::<bool>("INDEX_CURSEFORGE");
check_var::<String>("CDN_URL");
check_var::<String>("DATABASE_URL");
check_var::<String>("MEILISEARCH_ADDR");
check_var::<String>("BIND_ADDR");
@ -109,5 +183,18 @@ fn check_env_vars() {
check_var::<String>("BACKBLAZE_KEY_ID");
check_var::<String>("BACKBLAZE_KEY");
check_var::<String>("BACKBLAZE_BUCKET_ID");
} else {
check_var::<String>("MOCK_FILE_PATH");
}
check_var::<bool>("INDEX_CURSEFORGE");
if dotenv::var("INDEX_CURSEFORGE")
.ok()
.and_then(|s| s.parse::<bool>().ok())
.unwrap_or(false)
{
check_var::<usize>("EXTERNAL_INDEX_INTERVAL");
}
check_var::<usize>("LOCAL_INDEX_INTERVAL");
}

View File

@ -131,6 +131,10 @@ pub struct ModLoader(pub String);
#[derive(Serialize, Deserialize)]
pub struct SearchRequest {
pub query: Option<String>,
/// Must match a json 2 deep array of strings `[["categories:misc"]]`
// TODO: We may want to have a better representation of this, so that
// we are less likely to break backwards compatability
pub facets: Option<String>,
pub filters: Option<String>,
pub version: Option<String>,
pub offset: Option<String>,

View File

@ -3,6 +3,7 @@ use crate::file_hosting::{FileHost, FileHostingError};
use crate::models::error::ApiError;
use crate::models::mods::{GameVersion, ModId, VersionId, VersionType};
use crate::models::teams::TeamMember;
use crate::search::indexing::queue::CreationQueue;
use actix_multipart::{Field, Multipart};
use actix_web::http::StatusCode;
use actix_web::web::Data;
@ -10,6 +11,7 @@ use actix_web::{post, HttpResponse};
use futures::stream::StreamExt;
use serde::{Deserialize, Serialize};
use sqlx::postgres::PgPool;
use std::sync::Arc;
use thiserror::Error;
#[derive(Error, Debug)]
@ -124,7 +126,8 @@ async fn undo_uploads(
pub async fn mod_create(
payload: Multipart,
client: Data<PgPool>,
file_host: Data<std::sync::Arc<dyn FileHost + Send + Sync>>,
file_host: Data<Arc<dyn FileHost + Send + Sync>>,
indexing_queue: Data<Arc<CreationQueue>>,
) -> Result<HttpResponse, CreateError> {
let mut transaction = client.begin().await?;
let mut uploaded_files = Vec::new();
@ -134,6 +137,7 @@ pub async fn mod_create(
&mut transaction,
&***file_host,
&mut uploaded_files,
&***indexing_queue,
)
.await;
@ -159,6 +163,7 @@ async fn mod_create_inner(
transaction: &mut sqlx::Transaction<'_, sqlx::Postgres>,
file_host: &dyn FileHost,
uploaded_files: &mut Vec<UploadedFile>,
indexing_queue: &CreationQueue,
) -> Result<HttpResponse, CreateError> {
let cdn_url = dotenv::var("CDN_URL")?;
@ -377,6 +382,45 @@ async fn mod_create_inner(
initial_versions: created_versions,
};
let versions_list = mod_builder
.initial_versions
.iter()
.flat_map(|v| {
v.game_versions.iter().map(|id| id.0.to_string())
// TODO: proper version identifiers, once game versions
// have been implemented
})
.collect::<std::collections::HashSet<String>>()
.into_iter()
.collect::<Vec<_>>();
let now = chrono::Utc::now();
let timestamp = now.timestamp();
let formatted = now.to_string();
let index_mod = crate::search::UploadSearchMod {
mod_id: format!("local-{}", mod_id),
title: mod_builder.title.clone(),
description: mod_builder.description.clone(),
categories: create_data.categories.clone(),
versions: versions_list,
page_url: mod_builder.body_url.clone(),
icon_url: mod_builder.icon_url.clone().unwrap(),
// TODO: Author/team info, latest version info
author: String::new(),
author_url: String::new(),
latest_version: String::new(),
downloads: 0,
date_created: formatted.clone(),
created_timestamp: timestamp,
// TODO: store and return modified time
date_modified: formatted,
modified_timestamp: timestamp,
empty: std::borrow::Cow::Borrowed("{}{}{}"),
};
indexing_queue.add(index_mod);
let _mod_id = mod_builder.insert(&mut *transaction).await?;
// TODO: respond with the new mod info, or with just the new mod id.

View File

@ -6,5 +6,6 @@ use actix_web::{get, web, HttpResponse};
pub async fn mod_search(
web::Query(info): web::Query<SearchRequest>,
) -> Result<HttpResponse, SearchError> {
Ok(HttpResponse::Ok().json(search_for_mod(&info)?))
let results = search_for_mod(&info).await?;
Ok(HttpResponse::Ok().json(results))
}

30
src/scheduler.rs Normal file
View File

@ -0,0 +1,30 @@
use actix_rt::time;
use actix_rt::Arbiter;
use futures::StreamExt;
pub struct Scheduler {
arbiter: Arbiter,
}
impl Scheduler {
pub fn new() -> Self {
Scheduler {
arbiter: Arbiter::new(),
}
}
pub fn run<F, R>(&mut self, interval: std::time::Duration, task: F)
where
F: Fn() -> R + Send + 'static,
R: std::future::Future<Output = ()> + Send + 'static,
{
let future = time::interval(interval).for_each_concurrent(2, move |_| task());
self.arbiter.send(future);
}
}
impl Drop for Scheduler {
fn drop(&mut self) {
self.arbiter.stop();
}
}

View File

@ -1,5 +1,5 @@
use super::IndexingError;
use crate::search::SearchMod;
use crate::search::UploadSearchMod;
use log::info;
use serde::{Deserialize, Serialize};
@ -48,10 +48,10 @@ pub struct CurseForgeMod {
pub async fn index_curseforge(
start_index: i32,
end_index: i32,
) -> Result<Vec<SearchMod>, IndexingError> {
) -> Result<Vec<UploadSearchMod>, IndexingError> {
info!("Indexing curseforge mods!");
let mut docs_to_add: Vec<SearchMod> = vec![];
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
let res = reqwest::Client::new()
.post("https://addons-ecs.forgesvc.net/api/v2/addon")
@ -177,32 +177,32 @@ pub async fn index_curseforge(
.thumbnail_url
.replace("/256/256/", "/64/64/");
docs_to_add.push(SearchMod {
mod_id: -curseforge_mod.id as i64,
let created = curseforge_mod
.date_created
.parse::<chrono::DateTime<chrono::Utc>>()?;
let modified = curseforge_mod
.date_modified
.parse::<chrono::DateTime<chrono::Utc>>()?;
docs_to_add.push(UploadSearchMod {
mod_id: format!("curse-{}", curseforge_mod.id),
author: (&curseforge_mod.authors[0].name).to_string(),
title: curseforge_mod.name,
description: curseforge_mod.summary.chars().take(150).collect(),
keywords: mod_categories,
categories: mod_categories,
versions: mod_game_versions.clone(),
downloads: curseforge_mod.download_count as i32,
page_url: curseforge_mod.website_url,
icon_url,
author_url: (&curseforge_mod.authors[0].url).to_string(),
date_created: curseforge_mod.date_created.chars().take(10).collect(),
created: curseforge_mod
.date_created
.parse::<chrono::DateTime<chrono::Utc>>()?
.timestamp(),
date_modified: curseforge_mod.date_modified.chars().take(10).collect(),
updated: curseforge_mod
.date_modified
.parse::<chrono::DateTime<chrono::Utc>>()?
.timestamp(),
date_created: created.to_string(),
created_timestamp: created.timestamp(),
date_modified: modified.to_string(),
modified_timestamp: modified.timestamp(),
latest_version,
empty: String::from("{}{}{}"),
empty: std::borrow::Cow::Borrowed("{}{}{}"),
})
}
//TODO Reindex every hour for new mods.
Ok(docs_to_add)
}

View File

@ -2,13 +2,13 @@ use futures::{StreamExt, TryStreamExt};
use log::info;
use super::IndexingError;
use crate::search::SearchMod;
use crate::search::UploadSearchMod;
use sqlx::postgres::PgPool;
pub async fn index_local(pool: PgPool) -> Result<Vec<SearchMod>, IndexingError> {
pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingError> {
info!("Indexing local mods!");
let mut docs_to_add: Vec<SearchMod> = vec![];
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
let mut results = sqlx::query!(
"
@ -53,23 +53,25 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<SearchMod>, IndexingError>
icon_url = url;
}
docs_to_add.push(SearchMod {
mod_id: result.id,
author: "".to_string(),
let formatted = result.published.to_string();
let timestamp = result.published.timestamp();
docs_to_add.push(UploadSearchMod {
mod_id: format!("local-{}", crate::models::ids::ModId(result.id as u64)),
title: result.title,
description: result.description,
keywords: categories,
categories,
versions,
downloads: result.downloads,
page_url: result.body_url,
icon_url,
author: "".to_string(), // TODO: author/team info
author_url: "".to_string(),
date_created: result.published.to_string(),
created: 0,
date_modified: "".to_string(),
updated: 0,
latest_version: "".to_string(),
empty: String::from("{}{}{}"),
date_created: formatted.clone(),
created_timestamp: timestamp,
date_modified: formatted,
modified_timestamp: timestamp,
latest_version: "".to_string(), // TODO: Info about latest version
empty: std::borrow::Cow::Borrowed("{}{}{}"),
});
}
}

View File

@ -1,11 +1,13 @@
/// This module is used for the indexing from any source.
pub mod curseforge_import;
pub mod local_import;
pub mod queue;
use crate::search::indexing::curseforge_import::index_curseforge;
use crate::search::indexing::local_import::index_local;
use crate::search::SearchMod;
use crate::search::UploadSearchMod;
use curseforge_import::index_curseforge;
use local_import::index_local;
use meilisearch_sdk::client::Client;
use meilisearch_sdk::indexes::Index;
use meilisearch_sdk::settings::Settings;
use sqlx::postgres::PgPool;
use std::collections::{HashMap, VecDeque};
@ -14,7 +16,7 @@ use thiserror::Error;
#[derive(Error, Debug)]
pub enum IndexingError {
#[error("Error while connecting to the MeiliSearch database")]
IndexDBError(meilisearch_sdk::errors::Error),
IndexDBError(#[from] meilisearch_sdk::errors::Error),
#[error("Error while importing mods from CurseForge")]
CurseforgeImportError(reqwest::Error),
#[error("Error while serializing or deserializing JSON: {0}")]
@ -32,95 +34,115 @@ pub enum IndexingError {
// assumes a max average size of 1KiB per mod to avoid this cap.
const MEILISEARCH_CHUNK_SIZE: usize = 10000;
pub async fn index_mods(pool: PgPool) -> Result<(), IndexingError> {
// Check if the index exists
#[derive(Debug)]
pub struct IndexingSettings {
pub index_external: bool,
pub index_local: bool,
}
impl IndexingSettings {
pub fn from_env() -> Self {
let index_local = true;
let index_external = dotenv::var("INDEX_CURSEFORGE")
.ok()
.and_then(|b| b.parse::<bool>().ok())
.unwrap_or(false);
Self {
index_external,
index_local,
}
}
}
pub async fn index_mods(pool: PgPool, settings: IndexingSettings) -> Result<(), IndexingError> {
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
if settings.index_local {
docs_to_add.append(&mut index_local(pool.clone()).await?);
}
if settings.index_external {
docs_to_add.append(&mut index_curseforge(1, 400_000).await?);
}
// Write Indices
add_mods(docs_to_add).await?;
Ok(())
}
async fn create_index<'a>(
client: &'a Client<'a>,
name: &'a str,
rules: impl FnOnce() -> Vec<String>,
) -> Result<Index<'a>, IndexingError> {
match client.get_index(name).await {
// TODO: update index settings on startup (or delete old indices on startup)
Ok(index) => Ok(index),
Err(meilisearch_sdk::errors::Error::IndexNotFound) => {
// Only create index and set settings if the index doesn't already exist
let index = client.create_index(name, Some("mod_id")).await?;
index
.set_settings(&default_settings().with_ranking_rules(rules()))
.await?;
Ok(index)
}
Err(e) => {
log::warn!("Unhandled error while creating index: {}", e);
Err(IndexingError::IndexDBError(e))
}
}
}
async fn add_to_index(index: Index<'_>, mods: &[UploadSearchMod]) -> Result<(), IndexingError> {
for chunk in mods.chunks(MEILISEARCH_CHUNK_SIZE) {
index.add_documents(chunk, Some("mod_id")).await?;
}
Ok(())
}
pub async fn add_mods(mods: Vec<UploadSearchMod>) -> Result<(), IndexingError> {
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
let client = Client::new(address, "");
let mut docs_to_add: Vec<SearchMod> = vec![];
// Relevance Index
let relevance_index = create_index(&client, "relevance_mods", || {
let mut relevance_rules = default_rules();
relevance_rules.push_back("desc(downloads)".to_string());
relevance_rules.into()
})
.await?;
add_to_index(relevance_index, &mods).await?;
docs_to_add.append(&mut index_local(pool.clone()).await?);
if dotenv::var("INDEX_CURSEFORGE")?
.parse()
.expect("`INDEX_CURSEFORGE` is not a boolean.")
{
docs_to_add.append(&mut index_curseforge(1, 400_000).await?);
}
//Write Indexes
//Relevance Index
// Downloads Index
let downloads_index = create_index(&client, "downloads_mods", || {
let mut downloads_rules = default_rules();
downloads_rules.push_front("desc(downloads)".to_string());
downloads_rules.into()
})
.await?;
add_to_index(downloads_index, &mods).await?;
let mut relevance_index = client
.get_or_create("relevance_mods")
.map_err(IndexingError::IndexDBError)?;
// Updated Index
let updated_index = create_index(&client, "updated_mods", || {
let mut updated_rules = default_rules();
updated_rules.push_front("desc(updated)".to_string());
updated_rules.into()
})
.await?;
add_to_index(updated_index, &mods).await?;
let mut relevance_rules = default_rules();
relevance_rules.push_back("desc(downloads)".to_string());
relevance_index
.set_settings(&default_settings().with_ranking_rules(relevance_rules.into()))
.map_err(IndexingError::IndexDBError)?;
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
// TODO: get meilisearch sdk to not require cloning (ie take a reference to docs_to_add)
// This may require making our own fork of it.
relevance_index
.add_documents(Vec::from(chunk), Some("mod_id"))
.map_err(IndexingError::IndexDBError)?;
}
//Downloads Index
let mut downloads_index = client
.get_or_create("downloads_mods")
.map_err(IndexingError::IndexDBError)?;
let mut downloads_rules = default_rules();
downloads_rules.push_front("desc(downloads)".to_string());
downloads_index
.set_settings(&default_settings().with_ranking_rules(downloads_rules.into()))
.map_err(IndexingError::IndexDBError)?;
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
downloads_index
.add_documents(Vec::from(chunk), Some("mod_id"))
.map_err(IndexingError::IndexDBError)?;
}
//Updated Index
let mut updated_index = client
.get_or_create("updated_mods")
.map_err(IndexingError::IndexDBError)?;
let mut updated_rules = default_rules();
updated_rules.push_front("desc(updated)".to_string());
updated_index
.set_settings(&default_settings().with_ranking_rules(updated_rules.into()))
.map_err(IndexingError::IndexDBError)?;
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
updated_index
.add_documents(Vec::from(chunk), Some("mod_id"))
.map_err(IndexingError::IndexDBError)?;
}
//Created Index
let mut newest_index = client
.get_or_create("newest_mods")
.map_err(IndexingError::IndexDBError)?;
let mut newest_rules = default_rules();
newest_rules.push_back("desc(created)".to_string());
newest_index
.set_settings(&default_settings().with_ranking_rules(newest_rules.into()))
.map_err(IndexingError::IndexDBError)?;
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
newest_index
.add_documents(Vec::from(chunk), Some("mod_id"))
.map_err(IndexingError::IndexDBError)?;
}
// Created Index
let newest_index = create_index(&client, "newest_mods", || {
let mut newest_rules = default_rules();
newest_rules.push_front("desc(created)".to_string());
newest_rules.into()
})
.await?;
add_to_index(newest_index, &mods).await?;
Ok(())
}
@ -144,7 +166,7 @@ fn default_settings() -> Settings {
"author".to_string(),
"title".to_string(),
"description".to_string(),
"keywords".to_string(),
"categories".to_string(),
"versions".to_string(),
"downloads".to_string(),
"page_url".to_string(),
@ -155,13 +177,12 @@ fn default_settings() -> Settings {
"date_modified".to_string(),
"updated".to_string(),
"latest_version".to_string(),
"empty".to_string(),
];
let searchable_attributes = vec![
"title".to_string(),
"description".to_string(),
"keywords".to_string(),
"categories".to_string(),
"versions".to_string(),
"author".to_string(),
"empty".to_string(),
@ -173,6 +194,7 @@ fn default_settings() -> Settings {
.with_accept_new_fields(true)
.with_stop_words(vec![])
.with_synonyms(HashMap::new())
.with_attributes_for_faceting(vec![String::from("categories")])
}
//endregion

View File

@ -0,0 +1,31 @@
use super::{add_mods, IndexingError, UploadSearchMod};
use std::sync::Mutex;
pub struct CreationQueue {
// There's probably a better structure for this, but a mutex works
// and I don't think this can deadlock. This queue requires fast
// writes and then a single potentially slower read/write that
// empties the queue.
queue: Mutex<Vec<UploadSearchMod>>,
}
impl CreationQueue {
pub fn new() -> Self {
CreationQueue {
queue: Mutex::new(Vec::with_capacity(10)),
}
}
pub fn add(&self, search_mod: UploadSearchMod) {
// Can only panic if mutex is poisoned
self.queue.lock().unwrap().push(search_mod);
}
pub fn take(&self) -> Vec<UploadSearchMod> {
std::mem::replace(&mut *self.queue.lock().unwrap(), Vec::with_capacity(10))
}
}
pub async fn index_queue(queue: &CreationQueue) -> Result<(), IndexingError> {
let queue = queue.take();
add_mods(queue).await
}

View File

@ -6,6 +6,7 @@ use meilisearch_sdk::client::Client;
use meilisearch_sdk::document::Document;
use meilisearch_sdk::search::Query;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use thiserror::Error;
pub mod indexing;
@ -13,7 +14,7 @@ pub mod indexing;
#[derive(Error, Debug)]
pub enum SearchError {
#[error("Error while connecting to the MeiliSearch database")]
IndexDBError(meilisearch_sdk::errors::Error),
IndexDBError(#[from] meilisearch_sdk::errors::Error),
#[error("Error while serializing or deserializing JSON: {0}")]
SerDeError(#[from] serde_json::Error),
#[error("Error while parsing an integer: {0}")]
@ -45,36 +46,75 @@ impl actix_web::ResponseError for SearchError {
}
}
/// A mod document used for uploading mods to meilisearch's indices.
/// This contains some extra data that is not returned by search results.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct SearchMod {
pub mod_id: i64,
pub struct UploadSearchMod {
pub mod_id: String,
pub author: String,
pub title: String,
pub description: String,
pub keywords: Vec<String>,
pub categories: Vec<String>,
pub versions: Vec<String>,
pub downloads: i32,
pub page_url: String,
pub icon_url: String,
pub author_url: String,
pub date_created: String,
pub created: i64,
pub date_modified: String,
pub updated: i64,
pub latest_version: String,
pub empty: String,
/// RFC 3339 formatted creation date of the mod
pub date_created: String,
/// Unix timestamp of the creation date of the mod
pub created_timestamp: i64,
/// RFC 3339 formatted date/time of last major modification (update)
pub date_modified: String,
/// Unix timestamp of the last major modification
pub modified_timestamp: i64,
/// Must be "{}{}{}", a hack until meilisearch supports searches
/// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729)
// This is a Cow to prevent unnecessary allocations for a static
// string
pub empty: Cow<'static, str>,
}
impl Document for SearchMod {
type UIDType = i64;
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ResultSearchMod {
pub mod_id: String,
pub author: String,
pub title: String,
pub description: String,
pub categories: Vec<String>,
// TODO: more efficient format for listing versions, without many repetitions
pub versions: Vec<String>,
pub downloads: i32,
pub page_url: String,
pub icon_url: String,
pub author_url: String,
/// RFC 3339 formatted creation date of the mod
pub date_created: String,
/// RFC 3339 formatted modification date of the mod
pub date_modified: String,
pub latest_version: String,
}
impl Document for UploadSearchMod {
type UIDType = String;
fn get_uid(&self) -> &Self::UIDType {
&self.mod_id
}
}
pub fn search_for_mod(info: &SearchRequest) -> Result<Vec<SearchMod>, SearchError> {
use std::borrow::Cow;
impl Document for ResultSearchMod {
type UIDType = String;
fn get_uid(&self) -> &Self::UIDType {
&self.mod_id
}
}
pub async fn search_for_mod(info: &SearchRequest) -> Result<Vec<ResultSearchMod>, SearchError> {
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
let client = Client::new(address, "");
@ -98,11 +138,15 @@ pub fn search_for_mod(info: &SearchRequest) -> Result<Vec<SearchMod>, SearchErro
if !filters.is_empty() {
query = query.with_filters(&filters);
}
if let Some(facets) = &info.facets {
let facets = serde_json::from_str::<Vec<Vec<&str>>>(facets)?;
query = query.with_facet_filters(facets);
}
Ok(client
.get_index(format!("{}_mods", index).as_ref())
.map_err(SearchError::IndexDBError)?
.search::<SearchMod>(&query)
.map_err(SearchError::IndexDBError)?
.await?
.search::<ResultSearchMod>(&query)
.await?
.hits)
}