Optimize and fix some bugs in indexing (#98)

* Improve curseforge and local indexing

This should make curseforge indexing more efficient, and reuses
some of the normal local indexing for the queued indexing of
recently created mods.

* Unify impls for single and multiple routes for mods and versions

This uses the same backend for the single and multiple query
routes so that they no longer return inconsistent information.

* Cache valid curseforge mod ids to reduce request load

This caches the ids of minecraft mods and reuses them on indexing
to reduce the amount of unused addons that are returned.
This commit is contained in:
Aeledfyr 2020-11-03 18:55:50 -06:00 committed by GitHub
parent da79386cc3
commit d477874535
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 745 additions and 343 deletions

2
.env
View File

@ -31,5 +31,7 @@ LOCAL_INDEX_INTERVAL=3600
# 12 hours
EXTERNAL_INDEX_INTERVAL=43200
INDEX_CACHE_PATH=/tmp/modrinth-id-cache.json
GITHUB_CLIENT_ID=3acffb2e808d16d4b226
GITHUB_CLIENT_SECRET=none

1
Cargo.lock generated
View File

@ -1618,6 +1618,7 @@ dependencies = [
"futures",
"futures-timer",
"gumdrop",
"lazy_static",
"log",
"meilisearch-sdk",
"rand",

View File

@ -33,6 +33,7 @@ dotenv = "0.15"
log = "0.4.8"
env_logger = "0.8.1"
thiserror = "1.0.21"
lazy_static = "1.4.0"
futures = "0.3.6"
futures-timer = "3.0.2"

View File

@ -384,6 +384,74 @@
]
}
},
"225597042db9c2d95296ea6bbeda4e99ffc9ddfab3991c8637ac3f4749ece6f3": {
"query": "\n SELECT m.id, m.title, m.description, m.downloads, m.icon_url, m.body_url, m.published, m.updated, m.team_id\n FROM mods m\n WHERE id = $1\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int8"
},
{
"ordinal": 1,
"name": "title",
"type_info": "Varchar"
},
{
"ordinal": 2,
"name": "description",
"type_info": "Varchar"
},
{
"ordinal": 3,
"name": "downloads",
"type_info": "Int4"
},
{
"ordinal": 4,
"name": "icon_url",
"type_info": "Varchar"
},
{
"ordinal": 5,
"name": "body_url",
"type_info": "Varchar"
},
{
"ordinal": 6,
"name": "published",
"type_info": "Timestamptz"
},
{
"ordinal": 7,
"name": "updated",
"type_info": "Timestamptz"
},
{
"ordinal": 8,
"name": "team_id",
"type_info": "Int8"
}
],
"parameters": {
"Left": [
"Int8"
]
},
"nullable": [
false,
false,
false,
false,
true,
false,
false,
false,
false
]
}
},
"25131559cb73a088000ab6379a769233440ade6c7511542da410065190d203fc": {
"query": "\n SELECT id FROM loaders\n WHERE loader = $1\n ",
"describe": {
@ -664,6 +732,26 @@
"nullable": []
}
},
"621c3c5e5b3ac00c291b5f9cae2134420ef3e23f1f236267c4132222299c87a2": {
"query": "\n SELECT gv.version FROM versions\n INNER JOIN game_versions_versions gvv ON gvv.joining_version_id=versions.id\n INNER JOIN game_versions gv ON gvv.game_version_id=gv.id\n WHERE versions.mod_id = $1\n ORDER BY gv.created ASC\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "version",
"type_info": "Varchar"
}
],
"parameters": {
"Left": [
"Int8"
]
},
"nullable": [
false
]
}
},
"6562c876826ad3091a14eb50fa1f961a971c1d1bb158fc3dcb55d469a73facc6": {
"query": "\n SELECT v.mod_id, v.author_id, v.name, v.version_number,\n v.changelog_url, v.date_published, v.downloads,\n v.release_channel\n FROM versions v\n WHERE v.id = $1\n ",
"describe": {
@ -770,6 +858,26 @@
]
}
},
"72c1e6de8f2c8d89be030454eeab6d5c9695164af2ebfb8d7e94b2deee4f130d": {
"query": "\n SELECT c.category\n FROM mods_categories mc\n INNER JOIN categories c ON mc.joining_category_id=c.id\n WHERE mc.joining_mod_id = $1\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "category",
"type_info": "Varchar"
}
],
"parameters": {
"Left": [
"Int8"
]
},
"nullable": [
false
]
}
},
"72c75313688dfd88a659c5250c71b9899abd6186ab32a067a7d4b8a0846ebd18": {
"query": "\n INSERT INTO game_versions (version, type, created)\n VALUES ($1, COALESCE($2, 'other'), COALESCE($3, timezone('utc', now())))\n ON CONFLICT (version) DO UPDATE\n SET type = COALESCE($2, game_versions.type),\n created = COALESCE($3, game_versions.created)\n RETURNING id\n ",
"describe": {
@ -986,6 +1094,33 @@
]
}
},
"9d95d136d0e6eedee57e6aa524232c02609b89e4e26032e07403aabb69bea0d8": {
"query": "\n SELECT u.id, u.username FROM users u\n INNER JOIN team_members tm ON tm.user_id = u.id\n WHERE tm.team_id = $2 AND tm.role = $1\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int8"
},
{
"ordinal": 1,
"name": "username",
"type_info": "Varchar"
}
],
"parameters": {
"Left": [
"Text",
"Int8"
]
},
"nullable": [
false,
false
]
}
},
"a2a99a640468a9fb8f0718e5aea6740cf5b33dafd5e038c154d6a13674fa999b": {
"query": "\n INSERT INTO mods (\n id, team_id, title, description, body_url,\n published, downloads, icon_url, issues_url,\n source_url, wiki_url, status\n )\n VALUES (\n $1, $2, $3, $4, $5,\n $6, $7, $8, $9,\n $10, $11, $12\n )\n ",
"describe": {
@ -1139,6 +1274,26 @@
"nullable": []
}
},
"b34577335d30ffe30327cdd5b3c029a187a1cae27bea99ff0bcf062f87468fe7": {
"query": "\n SELECT loaders.loader FROM versions\n INNER JOIN loaders_versions lv ON lv.version_id = versions.id\n INNER JOIN loaders ON loaders.id = lv.loader_id\n WHERE versions.mod_id = $1\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "loader",
"type_info": "Varchar"
}
],
"parameters": {
"Left": [
"Int8"
]
},
"nullable": [
false
]
}
},
"b903ac4e686ef85ba28d698c668da07860e7f276b261d8f2cebb74e73b094970": {
"query": "\n DELETE FROM hashes\n WHERE EXISTS(\n SELECT 1 FROM files WHERE\n (files.version_id = $1) AND\n (hashes.file_id = files.id)\n )\n ",
"describe": {
@ -1248,26 +1403,6 @@
]
}
},
"c0899dcff4d7bc1ba3e953e5099210316bff2f98e6ab77ba84bc612eac4bce0a": {
"query": "\n SELECT gv.version FROM versions\n INNER JOIN game_versions_versions gvv ON gvv.joining_version_id=versions.id\n INNER JOIN game_versions gv ON gvv.game_version_id=gv.id\n WHERE versions.mod_id = $1\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "version",
"type_info": "Varchar"
}
],
"parameters": {
"Left": [
"Int8"
]
},
"nullable": [
false
]
}
},
"c1fddbf97350871b79cb0c235b1f7488c6616b7c1dfbde76a712fd57e91ba158": {
"query": "\n SELECT id FROM game_versions\n WHERE version = $1\n ",
"describe": {
@ -1288,6 +1423,26 @@
]
}
},
"c59de96d66ebf26c0497674308550da125e3ce2314a8ae5b2f95d892f4205f90": {
"query": "\n SELECT gv.version FROM versions\n INNER JOIN game_versions_versions gvv ON gvv.joining_version_id=versions.id\n INNER JOIN game_versions gv ON gvv.game_version_id=gv.id\n WHERE versions.mod_id = $1\n ORDER BY gv.created ASC\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "version",
"type_info": "Varchar"
}
],
"parameters": {
"Left": [
"Int8"
]
},
"nullable": [
false
]
}
},
"c64c487b56a25b252ff070fe03a7416e84260df8a6f938a018cc768598e9435b": {
"query": "\n SELECT category FROM categories\n WHERE id = $1\n ",
"describe": {

View File

@ -312,15 +312,38 @@ impl Mod {
.try_collect::<Vec<VersionId>>()
.await?;
let status = sqlx::query!(
"
SELECT status FROM statuses
WHERE id = $1
",
inner.status.0,
)
.fetch_one(executor)
.await?
.status;
Ok(Some(QueryMod {
inner,
categories,
versions,
status: crate::models::mods::ModStatus::from_str(&status),
}))
} else {
Ok(None)
}
}
pub async fn get_many_full<'a, E>(
mod_ids: Vec<ModId>,
exec: E,
) -> Result<Vec<Option<QueryMod>>, sqlx::Error>
where
E: sqlx::Executor<'a, Database = sqlx::Postgres> + Copy,
{
// TODO: this could be optimized
futures::future::try_join_all(mod_ids.into_iter().map(|id| Self::get_full(id, exec))).await
}
}
pub struct QueryMod {
@ -328,4 +351,5 @@ pub struct QueryMod {
pub categories: Vec<String>,
pub versions: Vec<VersionId>,
pub status: crate::models::mods::ModStatus,
}

View File

@ -523,6 +523,18 @@ impl Version {
Ok(None)
}
}
pub async fn get_many_full<'a, E>(
version_ids: Vec<VersionId>,
exec: E,
) -> Result<Vec<Option<QueryVersion>>, sqlx::Error>
where
E: sqlx::Executor<'a, Database = sqlx::Postgres> + Copy,
{
// TODO: this could be optimized
futures::future::try_join_all(version_ids.into_iter().map(|id| Self::get_full(id, exec)))
.await
}
}
pub struct ReleaseChannel {

View File

@ -315,6 +315,11 @@ fn check_env_vars() {
check_var::<usize>("LOCAL_INDEX_INTERVAL");
// In theory this should be an OsString since it's a path, but
// dotenv doesn't support that. The usage of this does treat
// it as an OsString, though.
check_var::<String>("INDEX_CACHE_PATH");
check_var::<String>("GITHUB_CLIENT_ID");
check_var::<String>("GITHUB_CLIENT_SECRET");
}

View File

@ -5,7 +5,7 @@ use crate::models::error::ApiError;
use crate::models::mods::{ModId, ModStatus, VersionId};
use crate::models::users::UserId;
use crate::routes::version_creation::InitialVersionData;
use crate::search::indexing::queue::CreationQueue;
use crate::search::indexing::{queue::CreationQueue, IndexingError};
use actix_multipart::{Field, Multipart};
use actix_web::http::StatusCode;
use actix_web::web::Data;
@ -13,7 +13,6 @@ use actix_web::{post, HttpRequest, HttpResponse};
use futures::stream::StreamExt;
use serde::{Deserialize, Serialize};
use sqlx::postgres::PgPool;
use std::borrow::Cow;
use std::sync::Arc;
use thiserror::Error;
@ -25,6 +24,8 @@ pub enum CreateError {
SqlxDatabaseError(#[from] sqlx::Error),
#[error("Database Error: {0}")]
DatabaseError(#[from] models::DatabaseError),
#[error("Indexing Error: {0}")]
IndexingError(#[from] IndexingError),
#[error("Error while parsing multipart payload")]
MultipartError(actix_multipart::MultipartError),
#[error("Error while parsing JSON: {0}")]
@ -55,6 +56,7 @@ impl actix_web::ResponseError for CreateError {
CreateError::EnvError(..) => StatusCode::INTERNAL_SERVER_ERROR,
CreateError::SqlxDatabaseError(..) => StatusCode::INTERNAL_SERVER_ERROR,
CreateError::DatabaseError(..) => StatusCode::INTERNAL_SERVER_ERROR,
CreateError::IndexingError(..) => StatusCode::INTERNAL_SERVER_ERROR,
CreateError::FileHostingError(..) => StatusCode::INTERNAL_SERVER_ERROR,
CreateError::SerDeError(..) => StatusCode::BAD_REQUEST,
CreateError::MultipartError(..) => StatusCode::BAD_REQUEST,
@ -75,6 +77,7 @@ impl actix_web::ResponseError for CreateError {
CreateError::EnvError(..) => "environment_error",
CreateError::SqlxDatabaseError(..) => "database_error",
CreateError::DatabaseError(..) => "database_error",
CreateError::IndexingError(..) => "indexing_error",
CreateError::FileHostingError(..) => "file_hosting_error",
CreateError::SerDeError(..) => "invalid_input",
CreateError::MultipartError(..) => "invalid_input",
@ -460,40 +463,7 @@ async fn mod_create_inner(
status: status_id,
};
let versions_list = mod_create_data
.initial_versions
.iter()
.flat_map(|v| v.game_versions.iter().map(|name| name.0.clone()))
.collect::<std::collections::HashSet<String>>()
.into_iter()
.collect::<Vec<_>>();
let now = chrono::Utc::now();
let timestamp = now.timestamp();
let index_mod = crate::search::UploadSearchMod {
mod_id: format!("local-{}", mod_id),
title: mod_builder.title.clone(),
description: mod_builder.description.clone(),
categories: mod_create_data.categories.clone(),
versions: versions_list,
page_url: format!("https://modrinth.com/mod/{}", mod_id),
// This should really be optional in the index
icon_url: mod_builder.icon_url.clone().unwrap_or_else(String::new),
author: current_user.username.clone(),
author_url: format!("https://modrinth.com/user/{}", current_user.id),
// TODO: latest version info
latest_version: String::new(),
downloads: 0,
date_created: now,
created_timestamp: timestamp,
date_modified: now,
modified_timestamp: timestamp,
host: Cow::Borrowed("modrinth"),
empty: Cow::Borrowed("{}{}{}"),
};
indexing_queue.add(index_mod);
let response = crate::models::mods::Mod {
id: mod_id,
@ -505,7 +475,7 @@ async fn mod_create_inner(
updated: now,
status,
downloads: 0,
categories: mod_create_data.categories.clone(),
categories: mod_create_data.categories,
versions: mod_builder
.initial_versions
.iter()
@ -519,6 +489,11 @@ async fn mod_create_inner(
let _mod_id = mod_builder.insert(&mut *transaction).await?;
let index_mod =
crate::search::indexing::local_import::query_one(mod_id.into(), &mut *transaction)
.await?;
indexing_queue.add(index_mod);
Ok(HttpResponse::Ok().json(response))
}
}

View File

@ -33,43 +33,15 @@ pub async fn mods_get(
.map(|x| x.into())
.collect();
let mods_data = database::models::Mod::get_many(mod_ids, &**pool)
let mods_data = database::models::Mod::get_many_full(mod_ids, &**pool)
.await
.map_err(|e| ApiError::DatabaseError(e.into()))?;
let mut mods: Vec<models::mods::Mod> = Vec::new();
for m in mods_data {
let status = sqlx::query!(
"
SELECT status FROM statuses
WHERE id = $1
",
m.status.0,
)
.fetch_one(&**pool)
.await
.map_err(|e| ApiError::DatabaseError(e.into()))?
.status;
mods.push(models::mods::Mod {
id: m.id.into(),
team: m.team_id.into(),
title: m.title,
description: m.description,
body_url: m.body_url,
published: m.published,
updated: m.updated,
status: models::mods::ModStatus::from_str(&*status),
downloads: m.downloads as u32,
categories: vec![],
versions: vec![],
icon_url: m.icon_url,
issues_url: m.issues_url,
source_url: m.source_url,
wiki_url: m.wiki_url,
})
}
let mods = mods_data
.into_iter()
.filter_map(|m| m)
.map(convert_mod)
.collect::<Vec<_>>();
Ok(HttpResponse::Ok().json(mods))
}
@ -85,44 +57,34 @@ pub async fn mod_get(
.map_err(|e| ApiError::DatabaseError(e.into()))?;
if let Some(data) = mod_data {
let m = data.inner;
let status = sqlx::query!(
"
SELECT status FROM statuses
WHERE id = $1
",
m.status.0,
)
.fetch_one(&**pool)
.await
.map_err(|e| ApiError::DatabaseError(e.into()))?
.status;
let response = models::mods::Mod {
id: m.id.into(),
team: m.team_id.into(),
title: m.title,
description: m.description,
body_url: m.body_url,
published: m.published,
updated: m.updated,
status: models::mods::ModStatus::from_str(&*status),
downloads: m.downloads as u32,
categories: data.categories,
versions: data.versions.into_iter().map(|v| v.into()).collect(),
icon_url: m.icon_url,
issues_url: m.issues_url,
source_url: m.source_url,
wiki_url: m.wiki_url,
};
Ok(HttpResponse::Ok().json(response))
Ok(HttpResponse::Ok().json(convert_mod(data)))
} else {
Ok(HttpResponse::NotFound().body(""))
}
}
fn convert_mod(data: database::models::mod_item::QueryMod) -> models::mods::Mod {
let m = data.inner;
models::mods::Mod {
id: m.id.into(),
team: m.team_id.into(),
title: m.title,
description: m.description,
body_url: m.body_url,
published: m.published,
updated: m.updated,
status: data.status,
downloads: m.downloads as u32,
categories: data.categories,
versions: data.versions.into_iter().map(|v| v.into()).collect(),
icon_url: m.icon_url,
issues_url: m.issues_url,
source_url: m.source_url,
wiki_url: m.wiki_url,
}
}
#[delete("{id}")]
pub async fn mod_delete(
req: HttpRequest,

View File

@ -4,7 +4,7 @@ use actix_web::{HttpResponse, Responder};
pub async fn not_found() -> impl Responder {
let data = ApiError {
error: "not_found",
description: "the route you called is not (yet) implemented",
description: "the requested route does not exist",
};
HttpResponse::NotFound().json(data)

View File

@ -48,7 +48,6 @@ pub struct VersionIds {
pub ids: String,
}
// TODO: Make this return the versions mod struct
#[get("versions")]
pub async fn versions_get(
web::Query(ids): web::Query<VersionIds>,
@ -58,30 +57,14 @@ pub async fn versions_get(
.into_iter()
.map(|x| x.into())
.collect();
let versions_data = database::models::Version::get_many(version_ids, &**pool)
let versions_data = database::models::Version::get_many_full(version_ids, &**pool)
.await
.map_err(|e| ApiError::DatabaseError(e.into()))?;
use models::mods::VersionType;
let versions: Vec<models::mods::Version> = versions_data
.into_iter()
.map(|data| models::mods::Version {
id: data.id.into(),
mod_id: data.mod_id.into(),
author_id: data.author_id.into(),
name: data.name,
version_number: data.version_number,
changelog_url: data.changelog_url,
date_published: data.date_published,
downloads: data.downloads as u32,
version_type: VersionType::Release,
files: vec![],
dependencies: Vec::new(), // TODO: dependencies
game_versions: vec![],
loaders: vec![],
})
.filter_map(|v| v)
.map(convert_version)
.collect();
Ok(HttpResponse::Ok().json(versions))
@ -98,61 +81,64 @@ pub async fn version_get(
.map_err(|e| ApiError::DatabaseError(e.into()))?;
if let Some(data) = version_data {
use models::mods::VersionType;
let response = models::mods::Version {
id: data.id.into(),
mod_id: data.mod_id.into(),
author_id: data.author_id.into(),
name: data.name,
version_number: data.version_number,
changelog_url: data.changelog_url,
date_published: data.date_published,
downloads: data.downloads as u32,
version_type: match data.release_channel.as_str() {
"release" => VersionType::Release,
"beta" => VersionType::Beta,
"alpha" => VersionType::Alpha,
_ => VersionType::Alpha,
},
files: data
.files
.into_iter()
.map(|f| {
models::mods::VersionFile {
url: f.url,
filename: f.filename,
// FIXME: Hashes are currently stored as an ascii byte slice instead
// of as an actual byte array in the database
hashes: f
.hashes
.into_iter()
.map(|(k, v)| Some((k, String::from_utf8(v).ok()?)))
.collect::<Option<_>>()
.unwrap_or_else(Default::default),
}
})
.collect(),
dependencies: Vec::new(), // TODO: dependencies
game_versions: data
.game_versions
.into_iter()
.map(models::mods::GameVersion)
.collect(),
loaders: data
.loaders
.into_iter()
.map(models::mods::ModLoader)
.collect(),
};
Ok(HttpResponse::Ok().json(response))
Ok(HttpResponse::Ok().json(convert_version(data)))
} else {
Ok(HttpResponse::NotFound().body(""))
}
}
fn convert_version(data: database::models::version_item::QueryVersion) -> models::mods::Version {
use models::mods::VersionType;
models::mods::Version {
id: data.id.into(),
mod_id: data.mod_id.into(),
author_id: data.author_id.into(),
name: data.name,
version_number: data.version_number,
changelog_url: data.changelog_url,
date_published: data.date_published,
downloads: data.downloads as u32,
version_type: match data.release_channel.as_str() {
"release" => VersionType::Release,
"beta" => VersionType::Beta,
"alpha" => VersionType::Alpha,
_ => VersionType::Alpha,
},
files: data
.files
.into_iter()
.map(|f| {
models::mods::VersionFile {
url: f.url,
filename: f.filename,
// FIXME: Hashes are currently stored as an ascii byte slice instead
// of as an actual byte array in the database
hashes: f
.hashes
.into_iter()
.map(|(k, v)| Some((k, String::from_utf8(v).ok()?)))
.collect::<Option<_>>()
.unwrap_or_else(Default::default),
}
})
.collect(),
dependencies: Vec::new(), // TODO: dependencies
game_versions: data
.game_versions
.into_iter()
.map(models::mods::GameVersion)
.collect(),
loaders: data
.loaders
.into_iter()
.map(models::mods::ModLoader)
.collect(),
}
}
#[delete("{version_id}")]
pub async fn version_delete(
req: HttpRequest,

View File

@ -6,217 +6,318 @@ use std::borrow::Cow;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Attachment {
pub url: String,
pub thumbnail_url: String,
pub struct Attachment<'a> {
pub url: Cow<'a, str>,
pub thumbnail_url: Cow<'a, str>,
pub is_default: bool,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Category {
pub name: String,
pub struct Category<'a> {
pub name: Cow<'a, str>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Author {
pub name: String,
pub url: String,
pub struct Author<'a> {
pub name: Cow<'a, str>,
pub url: Cow<'a, str>,
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct CurseVersion {
pub game_version: String,
pub struct CurseVersion<'a> {
pub game_version: Cow<'a, str>,
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct CurseForgeMod {
pub id: i32,
pub name: String,
pub authors: Vec<Author>,
pub attachments: Vec<Attachment>,
pub website_url: String,
pub summary: String,
pub struct LatestFile<'a> {
pub game_version: Vec<Cow<'a, str>>,
pub modules: Vec<VersionModule<'a>>,
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct VersionModule<'a> {
pub foldername: Cow<'a, str>,
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct CurseForgeMod<'a> {
pub id: u32,
pub name: Cow<'a, str>,
pub authors: Vec<Option<Author<'a>>>,
pub attachments: Vec<Attachment<'a>>,
pub website_url: Cow<'a, str>,
pub summary: Cow<'a, str>,
pub download_count: f32,
pub categories: Vec<Category>,
pub game_version_latest_files: Vec<CurseVersion>,
pub date_created: String,
pub date_modified: String,
pub game_slug: String,
pub categories: Vec<Category<'a>>,
pub latest_files: Vec<LatestFile<'a>>,
pub game_version_latest_files: Vec<CurseVersion<'a>>,
pub date_created: chrono::DateTime<chrono::Utc>,
pub date_modified: chrono::DateTime<chrono::Utc>,
pub category_section: CategorySection,
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct CategorySection {
pub id: u32,
}
#[derive(Default)]
struct Loaders {
forge: bool,
fabric: bool,
liteloader: bool,
rift: bool,
}
lazy_static::lazy_static! {
static ref CURSEFORGE_CATEGORIES: std::collections::HashMap<&'static str, &'static str> = {
let mut map = std::collections::HashMap::new();
map.insert("World Gen", "worldgen");
map.insert("Biomes", "worldgen");
map.insert("Ores and Resources", "worldgen");
map.insert("Structures", "worldgen");
map.insert("Dimensions", "worldgen");
map.insert("Mobs", "worldgen");
map.insert("Technology", "technology");
map.insert("Processing", "technology");
map.insert("Player Transport", "technology");
map.insert("Energy, Fluid, and Item Transport", "technology");
map.insert("Food", "food");
map.insert("Farming", "food");
map.insert("Energy", "technology");
map.insert("Redstone", "technology");
map.insert("Genetics", "technology");
map.insert("Magic", "magic");
map.insert("Storage", "storage");
map.insert("API and Library", "library");
map.insert("Adventure and RPG", "adventure");
map.insert("Map and Information", "utility");
map.insert("Cosmetic", "decoration");
map.insert("Addons", "misc");
map.insert("Thermal Expansion", "misc");
map.insert("Tinker's Construct", "misc");
map.insert("Industrial Craft", "misc");
map.insert("Thaumcraft", "misc");
map.insert("Buildcraft", "misc");
map.insert("Forestry", "misc");
map.insert("Blood Magic", "misc");
map.insert("Lucky Blocks", "misc");
map.insert("Applied Energistics 2", "misc");
map.insert("CraftTweaker", "misc");
map.insert("Miscellaneous", "misc");
map.insert("Armor, Tools, and Weapons", "equipment");
map.insert("Server Utility", "utility");
map
};
}
pub async fn index_curseforge(
start_index: i32,
end_index: i32,
start_index: u32,
end_index: u32,
cache_path: &std::path::Path,
) -> Result<Vec<UploadSearchMod>, IndexingError> {
info!("Indexing curseforge mods!");
let start = std::time::Instant::now();
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
let cache = std::fs::File::open(cache_path)
.map(std::io::BufReader::new)
.map(serde_json::from_reader::<_, Vec<u32>>);
let requested_ids;
// This caching system can't handle segmented indexing
if let Ok(Ok(mut cache)) = cache {
let end = cache.last().copied().unwrap_or(start_index);
cache.extend(end..end_index);
requested_ids = serde_json::to_string(&cache)?;
} else {
// This ends up being around 3 MiB
// Serde json is better than using debug formatting since it doesn't
// include spaces after commas, removing a lot of the extra size
requested_ids = serde_json::to_string(&(start_index..end_index).collect::<Vec<_>>())?;
}
let res = reqwest::Client::new()
.post("https://addons-ecs.forgesvc.net/api/v2/addon")
.header(reqwest::header::CONTENT_TYPE, "application/json")
.body(format!(
"{:?}",
(start_index..end_index).collect::<Vec<_>>()
))
.body(requested_ids)
.send()
.await
.map_err(IndexingError::CurseforgeImportError)?;
.await?;
let curseforge_mods: Vec<CurseForgeMod> = res
.json()
.await
.map_err(IndexingError::CurseforgeImportError)?;
// The response ends up being about 300MiB, so we have to deal with
// it efficiently. Reading it as bytes and then deserializing with
// borrowed data should avoid copying it, but it may take a bit more
// memory. To do this efficiently, we would have to get serde_json
// to skip deserializing mods with category_section.id != 8
// It's only 100MiB when using the cached ids, since that eliminates
// all "addons" that aren't minecraft mods
let buffer = res.bytes().await?;
for curseforge_mod in curseforge_mods {
if curseforge_mod.game_slug != "minecraft"
|| !curseforge_mod.website_url.contains("/mc-mods/")
{
continue;
let mut curseforge_mods: Vec<CurseForgeMod> = serde_json::from_slice(&buffer)?;
// This should remove many of the mods from the list before processing
curseforge_mods.retain(|m| m.category_section.id == 8);
// Only write to the cache if this doesn't skip mods at the start
// The caching system iterates through all ids normally past the last
// id in the cache, so the end_index shouldn't matter.
if start_index <= 1 {
let mut ids = curseforge_mods.iter().map(|m| m.id).collect::<Vec<_>>();
ids.sort_unstable();
if let Err(e) = std::fs::write(cache_path, serde_json::to_string(&ids)?) {
log::warn!("Error writing to index id cache: {}", e);
}
}
for mut curseforge_mod in curseforge_mods {
// The gameId of minecraft is 432
// The categorySection.id for mods is always 8
// The categorySection.id 8 appears to be unique to minecraft mods
// if curseforge_mod.game_slug != "minecraft"
// || !curseforge_mod.website_url.contains("/mc-mods/")
// if curseforge_mod.category_section.id != 8 {
// continue;
// }
let mut mod_game_versions = vec![];
let mut using_forge = false;
let mut using_fabric = false;
let mut loaders = Loaders::default();
for version in curseforge_mod.game_version_latest_files {
if let Some(parsed) = version
.game_version
.get(2..)
.and_then(|f| f.parse::<f32>().ok())
{
if parsed < 14.0 {
using_forge = true;
for file in curseforge_mod.latest_files {
for version in file.game_version {
match &*version {
"Fabric" => loaders.forge = true,
"Forge" => loaders.fabric = true,
"Rift" => loaders.rift = true,
_ => (),
}
}
for module in file.modules {
match &*module.foldername {
"fabric.mod.json" => loaders.fabric = true,
"mcmod.info" => loaders.forge = true, // 1.13+ forge uses META-INF/mods.toml
"riftmod.json" => loaders.rift = true,
"litemod.json" => loaders.liteloader = true,
_ => (),
}
}
// TODO: files ending with .litemod should also enable liteloader
// if we decide to add true support for it; That requires extra
// deserializing work, so I'm not adding it for now
}
let mut latest = None;
for version in curseforge_mod.game_version_latest_files {
let mut split = version.game_version.split('.');
let version_numbers = (
split.next().and_then(|s| s.parse::<u8>().ok()).unwrap_or(0),
split.next().and_then(|s| s.parse::<u8>().ok()).unwrap_or(0),
split.next().and_then(|s| s.parse::<u8>().ok()).unwrap_or(0),
);
if let Some((number, _)) = latest {
if version_numbers > number {
latest = Some((version_numbers, version.game_version.clone()));
}
} else {
latest = Some((version_numbers, version.game_version.clone()))
}
if ((1, 0, 0)..(1, 14, 0)).contains(&version_numbers) {
// Is this a reasonable assumption to make?
loaders.forge = true;
}
mod_game_versions.push(version.game_version);
}
let mut mod_categories = vec![];
let mut mod_categories = std::collections::HashSet::new();
for category in curseforge_mod.categories {
match &category.name[..] {
"World Gen" => mod_categories.push(String::from("worldgen")),
"Biomes" => mod_categories.push(String::from("worldgen")),
"Ores and Resources" => mod_categories.push(String::from("worldgen")),
"Structures" => mod_categories.push(String::from("worldgen")),
"Dimensions" => mod_categories.push(String::from("worldgen")),
"Mobs" => mod_categories.push(String::from("worldgen")),
"Technology" => mod_categories.push(String::from("technology")),
"Processing" => mod_categories.push(String::from("technology")),
"Player Transport" => mod_categories.push(String::from("technology")),
"Energy, Fluid, and Item Transport" => {
mod_categories.push(String::from("technology"))
}
"Food" => mod_categories.push(String::from("food")),
"Farming" => mod_categories.push(String::from("food")),
"Energy" => mod_categories.push(String::from("technology")),
"Redstone" => mod_categories.push(String::from("technology")),
"Genetics" => mod_categories.push(String::from("technology")),
"Magic" => mod_categories.push(String::from("magic")),
"Storage" => mod_categories.push(String::from("storage")),
"API and Library" => mod_categories.push(String::from("library")),
"Adventure and RPG" => mod_categories.push(String::from("adventure")),
"Map and Information" => mod_categories.push(String::from("utility")),
"Cosmetic" => mod_categories.push(String::from("decoration")),
"Addons" => mod_categories.push(String::from("misc")),
"Thermal Expansion" => mod_categories.push(String::from("misc")),
"Tinker's Construct" => mod_categories.push(String::from("misc")),
"Industrial Craft" => mod_categories.push(String::from("misc")),
"Thaumcraft" => mod_categories.push(String::from("misc")),
"Buildcraft" => mod_categories.push(String::from("misc")),
"Forestry" => mod_categories.push(String::from("misc")),
"Blood Magic" => mod_categories.push(String::from("misc")),
"Lucky Blocks" => mod_categories.push(String::from("misc")),
"Applied Energistics 2" => mod_categories.push(String::from("misc")),
"CraftTweaker" => mod_categories.push(String::from("misc")),
"Miscellaneous" => mod_categories.push(String::from("misc")),
"Armor, Tools, and Weapons" => mod_categories.push(String::from("equipment")),
"Server Utility" => mod_categories.push(String::from("utility")),
"Fabric" => mod_categories.push(String::from("fabric")),
_ => {}
if category.name == "Fabric" {
loaders.fabric = true;
} else if let Some(category) = CURSEFORGE_CATEGORIES.get(&*category.name) {
mod_categories.insert(*category);
}
}
if mod_categories.iter().any(|e| e == "fabric") {
using_fabric = true;
if !(loaders.fabric || loaders.rift || loaders.liteloader || loaders.forge) {
// Assume that mods without loaders will be
loaders.forge = true;
}
mod_categories.sort_unstable();
mod_categories.dedup();
mod_categories.truncate(3);
let mut mod_categories = mod_categories
.into_iter()
.take(3)
.map(Cow::Borrowed)
.collect::<Vec<_>>();
if using_forge {
mod_categories.push(String::from("forge"));
if loaders.forge {
mod_categories.push(Cow::Borrowed("forge"));
}
if using_fabric {
// The only way this could happen is if "fabric" is already a category
// mod_categories.push(String::from("fabric"));
if loaders.fabric {
mod_categories.push(Cow::Borrowed("fabric"));
}
let mut mod_attachments = curseforge_mod.attachments;
mod_attachments.retain(|x| x.is_default);
let latest_version = latest
.map(|(_, name)| name)
.unwrap_or_else(|| Cow::Borrowed("None"));
if mod_attachments.is_empty() {
mod_attachments.push(Attachment {
url: String::new(),
thumbnail_url: String::new(),
is_default: true,
})
}
let latest_version = if !mod_game_versions.is_empty() {
mod_game_versions[0].to_string()
} else {
"None".to_string()
};
let icon_url = mod_attachments[0]
.thumbnail_url
.replace("/256/256/", "/64/64/");
let created = curseforge_mod
.date_created
.parse::<chrono::DateTime<chrono::Utc>>()?;
let modified = curseforge_mod
.date_modified
.parse::<chrono::DateTime<chrono::Utc>>()?;
let icon_url = curseforge_mod
.attachments
.iter()
.find(|a| a.is_default)
.map(|a| a.thumbnail_url.replace("/256/256/", "/64/64/"))
.unwrap_or_default();
let author;
let author_url;
if let Some(user) = curseforge_mod.authors.get(0) {
author = user.name.clone();
author_url = user.url.clone();
if let Some(user) = curseforge_mod
.authors
.get_mut(0)
.map(Option::take)
.flatten()
{
author = user.name.into_owned();
author_url = user.url.into_owned();
} else {
author = String::from("unknown");
author_url = curseforge_mod.website_url.clone();
author = "unknown".to_owned();
author_url = String::from(&*curseforge_mod.website_url);
}
docs_to_add.push(UploadSearchMod {
mod_id: format!("curse-{}", curseforge_mod.id),
author,
title: curseforge_mod.name,
title: curseforge_mod.name.into_owned(),
description: curseforge_mod.summary.chars().take(150).collect(),
categories: mod_categories,
versions: mod_game_versions.clone(),
versions: mod_game_versions.into_iter().map(String::from).collect(),
downloads: curseforge_mod.download_count as i32,
page_url: curseforge_mod.website_url,
page_url: curseforge_mod.website_url.into_owned(),
icon_url,
author_url,
date_created: created,
created_timestamp: created.timestamp(),
date_modified: modified,
modified_timestamp: modified.timestamp(),
date_created: curseforge_mod.date_created,
created_timestamp: curseforge_mod.date_created.timestamp(),
date_modified: curseforge_mod.date_modified,
modified_timestamp: curseforge_mod.date_modified.timestamp(),
latest_version,
host: Cow::Borrowed("curseforge"),
empty: Cow::Borrowed("{}{}{}"),
})
}
let duration = start.elapsed();
info!(
"Finished indexing curseforge; Took {:5.2}s",
duration.as_secs_f32()
);
Ok(docs_to_add)
}

View File

@ -20,12 +20,13 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
while let Some(result) = mods.next().await {
if let Ok(mod_data) = result {
let versions: Vec<String> = sqlx::query!(
let versions = sqlx::query!(
"
SELECT gv.version FROM versions
INNER JOIN game_versions_versions gvv ON gvv.joining_version_id=versions.id
INNER JOIN game_versions gv ON gvv.game_version_id=gv.id
WHERE versions.mod_id = $1
ORDER BY gv.created ASC
",
mod_data.id
)
@ -34,7 +35,7 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
.try_collect::<Vec<String>>()
.await?;
let loaders: Vec<String> = sqlx::query!(
let loaders = sqlx::query!(
"
SELECT loaders.loader FROM versions
INNER JOIN loaders_versions lv ON lv.version_id = versions.id
@ -44,8 +45,8 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
mod_data.id
)
.fetch_many(&pool)
.try_filter_map(|e| async { Ok(e.right().map(|c| c.loader)) })
.try_collect::<Vec<String>>()
.try_filter_map(|e| async { Ok(e.right().map(|c| Cow::Owned(c.loader))) })
.try_collect::<Vec<Cow<str>>>()
.await?;
let mut categories = sqlx::query!(
@ -58,8 +59,8 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
mod_data.id
)
.fetch_many(&pool)
.try_filter_map(|e| async { Ok(e.right().map(|c| c.category)) })
.try_collect::<Vec<String>>()
.try_filter_map(|e| async { Ok(e.right().map(|c| Cow::Owned(c.category))) })
.try_collect::<Vec<Cow<str>>>()
.await?;
categories.extend(loaders);
@ -85,6 +86,15 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
let mod_id = crate::models::ids::ModId(mod_data.id as u64);
let author_id = crate::models::ids::UserId(user.id as u64);
// TODO: is this correct? This just gets the latest version of
// minecraft that this mod has a version that supports; it doesn't
// take betas or other info into account.
let latest_version = versions
.get(0)
.cloned()
.map(Cow::Owned)
.unwrap_or_else(|| Cow::Borrowed(""));
docs_to_add.push(UploadSearchMod {
mod_id: format!("local-{}", mod_id),
title: mod_data.title,
@ -100,7 +110,7 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
created_timestamp: mod_data.published.timestamp(),
date_modified: mod_data.updated,
modified_timestamp: mod_data.updated.timestamp(),
latest_version: "".to_string(), // TODO: Info about latest version
latest_version,
host: Cow::Borrowed("modrinth"),
empty: Cow::Borrowed("{}{}{}"),
});
@ -109,3 +119,112 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
Ok(docs_to_add)
}
pub async fn query_one(
id: crate::database::models::ModId,
exec: &mut sqlx::PgConnection,
) -> Result<UploadSearchMod, IndexingError> {
let mod_data = sqlx::query!(
"
SELECT m.id, m.title, m.description, m.downloads, m.icon_url, m.body_url, m.published, m.updated, m.team_id
FROM mods m
WHERE id = $1
",
id.0,
).fetch_one(&mut *exec).await?;
let versions = sqlx::query!(
"
SELECT gv.version FROM versions
INNER JOIN game_versions_versions gvv ON gvv.joining_version_id=versions.id
INNER JOIN game_versions gv ON gvv.game_version_id=gv.id
WHERE versions.mod_id = $1
ORDER BY gv.created ASC
",
mod_data.id
)
.fetch_many(&mut *exec)
.try_filter_map(|e| async { Ok(e.right().map(|c| c.version)) })
.try_collect::<Vec<String>>()
.await?;
let loaders = sqlx::query!(
"
SELECT loaders.loader FROM versions
INNER JOIN loaders_versions lv ON lv.version_id = versions.id
INNER JOIN loaders ON loaders.id = lv.loader_id
WHERE versions.mod_id = $1
",
mod_data.id
)
.fetch_many(&mut *exec)
.try_filter_map(|e| async { Ok(e.right().map(|c| Cow::Owned(c.loader))) })
.try_collect::<Vec<Cow<str>>>()
.await?;
let mut categories = sqlx::query!(
"
SELECT c.category
FROM mods_categories mc
INNER JOIN categories c ON mc.joining_category_id=c.id
WHERE mc.joining_mod_id = $1
",
mod_data.id
)
.fetch_many(&mut *exec)
.try_filter_map(|e| async { Ok(e.right().map(|c| Cow::Owned(c.category))) })
.try_collect::<Vec<Cow<str>>>()
.await?;
categories.extend(loaders);
let user = sqlx::query!(
"
SELECT u.id, u.username FROM users u
INNER JOIN team_members tm ON tm.user_id = u.id
WHERE tm.team_id = $2 AND tm.role = $1
",
crate::models::teams::OWNER_ROLE,
mod_data.team_id,
)
.fetch_one(&mut *exec)
.await?;
let mut icon_url = "".to_string();
if let Some(url) = mod_data.icon_url {
icon_url = url;
}
let mod_id = crate::models::ids::ModId(mod_data.id as u64);
let author_id = crate::models::ids::UserId(user.id as u64);
// TODO: is this correct? This just gets the latest version of
// minecraft that this mod has a version that supports; it doesn't
// take betas or other info into account.
let latest_version = versions
.get(0)
.cloned()
.map(Cow::Owned)
.unwrap_or_else(|| Cow::Borrowed(""));
Ok(UploadSearchMod {
mod_id: format!("local-{}", mod_id),
title: mod_data.title,
description: mod_data.description,
categories,
versions,
downloads: mod_data.downloads,
page_url: format!("https://modrinth.com/mod/{}", mod_id),
icon_url,
author: user.username,
author_url: format!("https://modrinth.com/user/{}", author_id),
date_created: mod_data.published,
created_timestamp: mod_data.published.timestamp(),
date_modified: mod_data.updated,
modified_timestamp: mod_data.updated.timestamp(),
latest_version,
host: Cow::Borrowed("modrinth"),
empty: Cow::Borrowed("{}{}{}"),
})
}

View File

@ -18,7 +18,7 @@ pub enum IndexingError {
#[error("Error while connecting to the MeiliSearch database")]
IndexDBError(#[from] meilisearch_sdk::errors::Error),
#[error("Error while importing mods from CurseForge")]
CurseforgeImportError(reqwest::Error),
CurseforgeImportError(#[from] reqwest::Error),
#[error("Error while serializing or deserializing JSON: {0}")]
SerDeError(#[from] serde_json::Error),
#[error("Error while parsing a timestamp: {0}")]
@ -63,6 +63,8 @@ pub async fn index_mods(
) -> Result<(), IndexingError> {
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
let cache_path = std::path::PathBuf::from(std::env::var_os("INDEX_CACHE_PATH").unwrap());
if settings.index_local {
docs_to_add.append(&mut index_local(pool.clone()).await?);
}
@ -72,7 +74,7 @@ pub async fn index_mods(
.map(|i| i.parse().unwrap())
.unwrap_or(450_000);
docs_to_add.append(&mut index_curseforge(1, end_index).await?);
docs_to_add.append(&mut index_curseforge(1, end_index, &cache_path).await?);
}
// Write Indices
@ -284,3 +286,60 @@ fn default_settings() -> Settings {
}
//endregion
// This shouldn't be relied on for proper sorting, but it makes an
// attempt at getting proper sorting for mojang's versions.
// This isn't currenly used, but I wrote it and it works, so I'm
// keeping this mess in case someone needs it in the future.
#[allow(dead_code)]
pub fn sort_mods(a: &str, b: &str) -> std::cmp::Ordering {
use std::cmp::Ordering;
let cmp = a.contains('.').cmp(&b.contains('.'));
if cmp != Ordering::Equal {
return cmp;
}
let mut a = a.split(&['.', '-'] as &[char]);
let mut b = b.split(&['.', '-'] as &[char]);
let a = (a.next(), a.next(), a.next(), a.next());
let b = (b.next(), b.next(), b.next(), b.next());
if a.0 == b.0 {
let cmp =
a.1.map(|s| s.chars().all(|c| c.is_ascii_digit()))
.cmp(&b.1.map(|s| s.chars().all(|c| c.is_ascii_digit())));
if cmp != Ordering::Equal {
return cmp;
}
if a.1 == b.1 {
let cmp =
a.2.map(|s| s.chars().all(|c| c.is_ascii_digit()))
.unwrap_or(true)
.cmp(
&b.2.map(|s| s.chars().all(|c| c.is_ascii_digit()))
.unwrap_or(true),
);
if cmp != Ordering::Equal {
return cmp;
}
if a.2 == b.2 {
match (a.3.is_some(), b.3.is_some()) {
(false, false) => Ordering::Equal,
(false, true) => Ordering::Greater,
(true, false) => Ordering::Less,
(true, true) => a.3.cmp(&b.3),
}
} else {
a.2.cmp(&b.2)
}
} else {
a.1.cmp(&b.1)
}
} else {
match (a.0 == Some("1"), b.0 == Some("1")) {
(false, false) => a.0.cmp(&b.0),
(true, false) => Ordering::Greater,
(false, true) => Ordering::Less,
(true, true) => Ordering::Equal, // unreachable
}
}
}

View File

@ -66,13 +66,13 @@ pub struct UploadSearchMod {
pub author: String,
pub title: String,
pub description: String,
pub categories: Vec<String>,
pub categories: Vec<Cow<'static, str>>,
pub versions: Vec<String>,
pub downloads: i32,
pub page_url: String,
pub icon_url: String,
pub author_url: String,
pub latest_version: String,
pub latest_version: Cow<'static, str>,
/// RFC 3339 formatted creation date of the mod
pub date_created: DateTime<Utc>,