Fix high memory usage during search (#798)

* Fix high memory usage during search

* Fix settings lag

* Fix clippy + fmt
This commit is contained in:
Geometrically 2023-12-12 20:11:56 -07:00 committed by GitHub
parent 00e55b1874
commit f53b6b550f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 108 additions and 162 deletions

View File

@ -492,6 +492,12 @@ impl Project {
where where
E: sqlx::Acquire<'a, Database = sqlx::Postgres>, E: sqlx::Acquire<'a, Database = sqlx::Postgres>,
{ {
let project_strings = project_strings
.iter()
.map(|x| x.to_string())
.unique()
.collect::<Vec<String>>();
if project_strings.is_empty() { if project_strings.is_empty() {
return Ok(Vec::new()); return Ok(Vec::new());
} }
@ -500,10 +506,7 @@ impl Project {
let mut exec = exec.acquire().await?; let mut exec = exec.acquire().await?;
let mut found_projects = Vec::new(); let mut found_projects = Vec::new();
let mut remaining_strings = project_strings let mut remaining_strings = project_strings.clone();
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>();
let mut project_ids = project_strings let mut project_ids = project_strings
.iter() .iter()

View File

@ -494,6 +494,12 @@ impl Version {
where where
E: sqlx::Acquire<'a, Database = sqlx::Postgres>, E: sqlx::Acquire<'a, Database = sqlx::Postgres>,
{ {
let version_ids = version_ids
.iter()
.unique()
.copied()
.collect::<Vec<VersionId>>();
use futures::stream::TryStreamExt; use futures::stream::TryStreamExt;
if version_ids.is_empty() { if version_ids.is_empty() {

View File

@ -1,4 +1,4 @@
mod admin; pub(crate) mod admin;
pub mod flows; pub mod flows;
pub mod pats; pub mod pats;
pub mod session; pub mod session;

View File

@ -1,147 +0,0 @@
use crate::auth::validate::get_user_record_from_bearer_token;
use crate::database::redis::RedisPool;
use crate::models::analytics::Download;
use crate::models::ids::ProjectId;
use crate::models::pats::Scopes;
use crate::queue::analytics::AnalyticsQueue;
use crate::queue::maxmind::MaxMindIndexer;
use crate::queue::session::AuthQueue;
use crate::routes::ApiError;
use crate::search::SearchConfig;
use crate::util::date::get_current_tenths_of_ms;
use crate::util::guards::admin_key_guard;
use actix_web::{patch, post, web, HttpRequest, HttpResponse};
use serde::Deserialize;
use sqlx::PgPool;
use std::collections::HashMap;
use std::net::Ipv4Addr;
use std::sync::Arc;
pub fn config(cfg: &mut web::ServiceConfig) {
cfg.service(
web::scope("admin")
.service(count_download)
.service(force_reindex),
);
}
#[derive(Deserialize)]
pub struct DownloadBody {
pub url: String,
pub project_id: ProjectId,
pub version_name: String,
pub ip: String,
pub headers: HashMap<String, String>,
}
// This is an internal route, cannot be used without key
#[patch("/_count-download", guard = "admin_key_guard")]
#[allow(clippy::too_many_arguments)]
pub async fn count_download(
req: HttpRequest,
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
maxmind: web::Data<Arc<MaxMindIndexer>>,
analytics_queue: web::Data<Arc<AnalyticsQueue>>,
session_queue: web::Data<AuthQueue>,
download_body: web::Json<DownloadBody>,
) -> Result<HttpResponse, ApiError> {
let token = download_body
.headers
.iter()
.find(|x| x.0.to_lowercase() == "authorization")
.map(|x| &**x.1);
let user = get_user_record_from_bearer_token(&req, token, &**pool, &redis, &session_queue)
.await
.ok()
.flatten();
let project_id: crate::database::models::ids::ProjectId = download_body.project_id.into();
let id_option = crate::models::ids::base62_impl::parse_base62(&download_body.version_name)
.ok()
.map(|x| x as i64);
let (version_id, project_id) = if let Some(version) = sqlx::query!(
"
SELECT v.id id, v.mod_id mod_id FROM files f
INNER JOIN versions v ON v.id = f.version_id
WHERE f.url = $1
",
download_body.url,
)
.fetch_optional(pool.as_ref())
.await?
{
(version.id, version.mod_id)
} else if let Some(version) = sqlx::query!(
"
SELECT id, mod_id FROM versions
WHERE ((version_number = $1 OR id = $3) AND mod_id = $2)
",
download_body.version_name,
project_id as crate::database::models::ids::ProjectId,
id_option
)
.fetch_optional(pool.as_ref())
.await?
{
(version.id, version.mod_id)
} else {
return Err(ApiError::InvalidInput(
"Specified version does not exist!".to_string(),
));
};
let url = url::Url::parse(&download_body.url)
.map_err(|_| ApiError::InvalidInput("invalid download URL specified!".to_string()))?;
let ip = crate::routes::analytics::convert_to_ip_v6(&download_body.ip)
.unwrap_or_else(|_| Ipv4Addr::new(127, 0, 0, 1).to_ipv6_mapped());
analytics_queue.add_download(Download {
recorded: get_current_tenths_of_ms(),
domain: url.host_str().unwrap_or_default().to_string(),
site_path: url.path().to_string(),
user_id: user
.and_then(|(scopes, x)| {
if scopes.contains(Scopes::PERFORM_ANALYTICS) {
Some(x.id.0 as u64)
} else {
None
}
})
.unwrap_or(0),
project_id: project_id as u64,
version_id: version_id as u64,
ip,
country: maxmind.query(ip).await.unwrap_or_default(),
user_agent: download_body
.headers
.get("user-agent")
.cloned()
.unwrap_or_default(),
headers: download_body
.headers
.clone()
.into_iter()
.filter(|x| !crate::routes::analytics::FILTERED_HEADERS.contains(&&*x.0.to_lowercase()))
.collect(),
});
Ok(HttpResponse::NoContent().body(""))
}
#[post("/_force_reindex", guard = "admin_key_guard")]
pub async fn force_reindex(
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
config: web::Data<SearchConfig>,
) -> Result<HttpResponse, ApiError> {
use crate::search::indexing::index_projects;
let redis = redis.get_ref();
index_projects(pool.as_ref().clone(), redis.clone(), &config).await?;
Ok(HttpResponse::NoContent().finish())
}

View File

@ -1,4 +1,3 @@
mod admin;
mod moderation; mod moderation;
mod notifications; mod notifications;
pub(crate) mod project_creation; pub(crate) mod project_creation;
@ -20,7 +19,7 @@ pub fn config(cfg: &mut actix_web::web::ServiceConfig) {
cfg.service( cfg.service(
actix_web::web::scope("v2") actix_web::web::scope("v2")
.wrap(default_cors()) .wrap(default_cors())
.configure(admin::config) .configure(super::internal::admin::config)
// Todo: separate these- they need to also follow v2-v3 conversion // Todo: separate these- they need to also follow v2-v3 conversion
.configure(super::internal::session::config) .configure(super::internal::session::config)
.configure(super::internal::flows::config) .configure(super::internal::flows::config)

View File

@ -137,17 +137,95 @@ async fn create_or_update_index(
Ok(index) => { Ok(index) => {
info!("Updating index settings."); info!("Updating index settings.");
let old_settings = index.get_settings().await?;
let mut settings = default_settings(); let mut settings = default_settings();
if let Some(custom_rules) = custom_rules { if let Some(custom_rules) = custom_rules {
settings = settings.with_ranking_rules(custom_rules); settings = settings.with_ranking_rules(custom_rules);
} }
let old_settings = Settings {
synonyms: None, // We don't use synonyms right now
stop_words: if settings.stop_words.is_none() {
None
} else {
old_settings.stop_words.map(|mut x| {
x.sort();
x
})
},
ranking_rules: if settings.ranking_rules.is_none() {
None
} else {
old_settings.ranking_rules
},
filterable_attributes: if settings.filterable_attributes.is_none() {
None
} else {
old_settings.filterable_attributes.map(|mut x| {
x.sort();
x
})
},
sortable_attributes: if settings.sortable_attributes.is_none() {
None
} else {
old_settings.sortable_attributes.map(|mut x| {
x.sort();
x
})
},
distinct_attribute: if settings.distinct_attribute.is_none() {
None
} else {
old_settings.distinct_attribute
},
searchable_attributes: if settings.searchable_attributes.is_none() {
None
} else {
old_settings.searchable_attributes
},
displayed_attributes: if settings.displayed_attributes.is_none() {
None
} else {
old_settings.displayed_attributes.map(|mut x| {
x.sort();
x
})
},
pagination: if settings.pagination.is_none() {
None
} else {
old_settings.pagination
},
faceting: if settings.faceting.is_none() {
None
} else {
old_settings.faceting
},
};
if old_settings.synonyms != settings.synonyms
|| old_settings.stop_words != settings.stop_words
|| old_settings.ranking_rules != settings.ranking_rules
|| old_settings.filterable_attributes != settings.filterable_attributes
|| old_settings.sortable_attributes != settings.sortable_attributes
|| old_settings.distinct_attribute != settings.distinct_attribute
|| old_settings.searchable_attributes != settings.searchable_attributes
|| old_settings.displayed_attributes != settings.displayed_attributes
|| old_settings.pagination != settings.pagination
|| old_settings.faceting != settings.faceting
{
info!("Performing index settings set.");
index index
.set_settings(&settings) .set_settings(&settings)
.await? .await?
.wait_for_completion(client, None, Some(TIMEOUT)) .wait_for_completion(client, None, Some(TIMEOUT))
.await?; .await?;
info!("Done performing index settings set.");
}
Ok(index) Ok(index)
} }
_ => { _ => {
@ -241,12 +319,19 @@ pub async fn add_projects(
} }
fn default_settings() -> Settings { fn default_settings() -> Settings {
let mut sorted_display = DEFAULT_DISPLAYED_ATTRIBUTES.to_vec();
sorted_display.sort();
let mut sorted_sortable = DEFAULT_SORTABLE_ATTRIBUTES.to_vec();
sorted_sortable.sort();
let mut sorted_attrs = DEFAULT_ATTRIBUTES_FOR_FACETING.to_vec();
sorted_attrs.sort();
Settings::new() Settings::new()
.with_distinct_attribute("project_id") .with_distinct_attribute("project_id")
.with_displayed_attributes(DEFAULT_DISPLAYED_ATTRIBUTES) .with_displayed_attributes(sorted_display)
.with_searchable_attributes(DEFAULT_SEARCHABLE_ATTRIBUTES) .with_searchable_attributes(DEFAULT_SEARCHABLE_ATTRIBUTES)
.with_sortable_attributes(DEFAULT_SORTABLE_ATTRIBUTES) .with_sortable_attributes(sorted_sortable)
.with_filterable_attributes(DEFAULT_ATTRIBUTES_FOR_FACETING) .with_filterable_attributes(sorted_attrs)
.with_pagination(PaginationSetting { .with_pagination(PaginationSetting {
max_total_hits: 2147483647, max_total_hits: 2147483647,
}) })