Added monitoring, limited concurent connections (#245)
* reduced the default, and added environment override. * Using parse is more stable and doesn't fail CI this time :P * Added support for monitoring This support is currently basic, but it can be improved later down the road. * Forgot scheduler file * Added health check * Cargo fix * Update cargo.lock to avoid action fails. Co-authored-by: Geometrically <18202329+Geometrically@users.noreply.github.com>
This commit is contained in:
parent
04998d0215
commit
efa8d5c575
34
Cargo.lock
generated
34
Cargo.lock
generated
@ -359,6 +359,17 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "actix-web-prom"
|
||||
version = "0.5.1"
|
||||
source = "git+https://github.com/nlopes/actix-web-prom?branch=master#05ca96dfb04c9d9c783dc658c7d1e12c1e8b1706"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"futures",
|
||||
"pin-project 1.0.7",
|
||||
"prometheus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "actix_derive"
|
||||
version = "0.5.0"
|
||||
@ -1954,6 +1965,7 @@ dependencies = [
|
||||
"actix-ratelimit",
|
||||
"actix-rt",
|
||||
"actix-web",
|
||||
"actix-web-prom",
|
||||
"async-trait",
|
||||
"base64 0.13.0",
|
||||
"bitflags",
|
||||
@ -1966,6 +1978,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"log",
|
||||
"meilisearch-sdk",
|
||||
"prometheus",
|
||||
"rand 0.7.3",
|
||||
"regex",
|
||||
"reqwest 0.10.10",
|
||||
@ -2564,6 +2577,27 @@ dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5986aa8d62380092d2f50f8b1cdba9cb9b6731ffd4b25b51fd126b6c3e05b99c"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"fnv",
|
||||
"lazy_static",
|
||||
"memchr",
|
||||
"parking_lot",
|
||||
"protobuf",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protobuf"
|
||||
version = "2.25.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23129d50f2c9355ced935fce8a08bd706ee2e7ce2b3b33bf61dace0e379ac63a"
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "1.2.3"
|
||||
|
||||
@ -56,3 +56,6 @@ sqlx = { version = "0.4.2", features = ["runtime-actix-rustls", "postgres", "chr
|
||||
|
||||
sentry = { version = "0.22.0", features = ["log"] }
|
||||
sentry-actix = "0.22.0"
|
||||
|
||||
actix-web-prom = {git = "https://github.com/nlopes/actix-web-prom", branch = "master"}
|
||||
prometheus = "0.12.0"
|
||||
|
||||
@ -8,7 +8,6 @@ const MIGRATION_FOLDER: &str = "migrations";
|
||||
|
||||
pub async fn connect() -> Result<PgPool, sqlx::Error> {
|
||||
info!("Initializing database connection");
|
||||
|
||||
let database_url = dotenv::var("DATABASE_URL").expect("`DATABASE_URL` not in .env");
|
||||
let pool = PgPoolOptions::new()
|
||||
.min_connections(
|
||||
|
||||
10
src/health/mod.rs
Normal file
10
src/health/mod.rs
Normal file
@ -0,0 +1,10 @@
|
||||
pub mod scheduler;
|
||||
pub mod pod;
|
||||
pub mod status;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
lazy_static!{
|
||||
pub static ref SEARCH_READY: AtomicBool = AtomicBool::new(false);
|
||||
}
|
||||
33
src/health/pod.rs
Normal file
33
src/health/pod.rs
Normal file
@ -0,0 +1,33 @@
|
||||
use std::sync::{RwLock, Arc};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PodInfo {
|
||||
pub pod_name: String,
|
||||
pub node_name: String,
|
||||
pod_id: Arc<RwLock<Option<String>>>,
|
||||
}
|
||||
|
||||
impl PodInfo {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
pod_name: dotenv::var("POD_NAME").unwrap_or("DEV".to_string()),
|
||||
node_name: dotenv::var("NODE_NAME").unwrap_or("self-hosted".to_string()),
|
||||
pod_id: Arc::new(RwLock::new(None))
|
||||
}
|
||||
}
|
||||
pub fn get_id(&self) -> String {
|
||||
{
|
||||
let lock = self.pod_id.read().unwrap();
|
||||
if lock.is_some() {
|
||||
return lock.clone().unwrap();
|
||||
}
|
||||
}
|
||||
let mut lock = self.pod_id.write().unwrap();
|
||||
let id = self.generate_id();
|
||||
lock.replace(id.clone());
|
||||
id
|
||||
}
|
||||
fn generate_id(&self) -> String {
|
||||
base64::encode(format!("{}-{}", self.node_name, self.pod_name))
|
||||
}
|
||||
}
|
||||
127
src/health/scheduler.rs
Normal file
127
src/health/scheduler.rs
Normal file
@ -0,0 +1,127 @@
|
||||
use crate::scheduler::Scheduler;
|
||||
use sqlx::{Pool, Postgres};
|
||||
|
||||
use prometheus::{opts, IntGaugeVec};
|
||||
use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform};
|
||||
use actix_web::Error;
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::future::{Future};
|
||||
use std::task::{Context, Poll};
|
||||
use futures::future::{ok, Ready};
|
||||
|
||||
use crate::health::pod::PodInfo;
|
||||
use actix_web::http::{HeaderName, HeaderValue};
|
||||
use actix_web_prom::{PrometheusMetrics};
|
||||
|
||||
pub struct HealthCounters {
|
||||
pod: PodInfo,
|
||||
idle_db_conn: IntGaugeVec,
|
||||
opened_db_conn: IntGaugeVec,
|
||||
current_requests: IntGaugeVec,
|
||||
}
|
||||
impl HealthCounters {
|
||||
pub fn new() -> Self {
|
||||
let idle_opts = opts!("idle_db_conn", "Amount of idle connections").namespace("api");
|
||||
let opened_opts = opts!("open_db_conn", "Amount of open connections").namespace("api");
|
||||
let current_opts = opts!("current_requests", "Currently open requests").namespace("api");
|
||||
Self {
|
||||
pod: PodInfo::new(),
|
||||
idle_db_conn: IntGaugeVec::new(idle_opts, &[]).unwrap(),
|
||||
opened_db_conn: IntGaugeVec::new(opened_opts, &[]).unwrap(),
|
||||
current_requests: IntGaugeVec::new(current_opts, &["endpoint", "method"]).unwrap(),
|
||||
}
|
||||
}
|
||||
pub fn register(&self, builder: &mut PrometheusMetrics) {
|
||||
builder
|
||||
.registry
|
||||
.register(Box::new(self.opened_db_conn.clone())).unwrap();
|
||||
builder
|
||||
.registry
|
||||
.register(Box::new(self.idle_db_conn.clone())).unwrap();
|
||||
builder
|
||||
.registry
|
||||
.register(Box::new(self.current_requests.clone())).unwrap();
|
||||
}
|
||||
pub fn schedule(&self, pool: Pool<Postgres>, scheduler: &mut Scheduler) {
|
||||
let this = self.clone();
|
||||
scheduler.run(std::time::Duration::from_secs(5), move || {
|
||||
let idle = pool.num_idle();
|
||||
let total = pool.size();
|
||||
this.idle_db_conn.with_label_values(&[]).set(idle as i64);
|
||||
this.opened_db_conn.with_label_values(&[]).set(total as i64);
|
||||
async move {
|
||||
ok::<i32, i32>(1).await.unwrap();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for HealthCounters {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
pod: self.pod.clone(),
|
||||
idle_db_conn: self.idle_db_conn.clone(),
|
||||
opened_db_conn: self.opened_db_conn.clone(),
|
||||
current_requests: self.current_requests.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, B> Transform<S> for HealthCounters
|
||||
where
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
|
||||
S::Future: 'static,
|
||||
B: 'static,
|
||||
{
|
||||
type Request = ServiceRequest;
|
||||
type Response = ServiceResponse<B>;
|
||||
type Error = Error;
|
||||
type Transform = MonitoringMiddleware<S>;
|
||||
type InitError = ();
|
||||
type Future = Ready<Result<Self::Transform, Self::InitError>>;
|
||||
|
||||
fn new_transform(&self, service: S) -> Self::Future {
|
||||
ok(MonitoringMiddleware { service, counters: self.clone() })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
pub struct MonitoringMiddleware<S> {
|
||||
service: S,
|
||||
counters: HealthCounters,
|
||||
}
|
||||
|
||||
impl<S, B> Service for MonitoringMiddleware<S>
|
||||
where
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
|
||||
S::Future: 'static,
|
||||
B: 'static,
|
||||
{
|
||||
type Request = ServiceRequest;
|
||||
type Response = ServiceResponse<B>;
|
||||
type Error = Error;
|
||||
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>>>>;
|
||||
|
||||
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
self.service.poll_ready(cx)
|
||||
}
|
||||
|
||||
fn call(&mut self, req: ServiceRequest) -> Self::Future {
|
||||
// The request has started.
|
||||
let pattern_or_path = req.match_pattern().unwrap_or("unknown".to_string());
|
||||
let counter = self.counters.current_requests.with_label_values(&[&*pattern_or_path,req.method().as_str()]);
|
||||
counter.inc();
|
||||
let pod = self.counters.pod.clone();
|
||||
let fut = self.service.call(req);
|
||||
Box::pin(async move {
|
||||
let mut res: Self::Response = fut.await?;
|
||||
// The request finished, remove a counter
|
||||
counter.dec();
|
||||
res.headers_mut().insert(HeaderName::from_static("x-server"), HeaderValue::from_str(&*pod.get_id()).unwrap());
|
||||
Ok(res)
|
||||
})
|
||||
}
|
||||
}
|
||||
16
src/health/status.rs
Normal file
16
src/health/status.rs
Normal file
@ -0,0 +1,16 @@
|
||||
use sqlx::{PgPool};
|
||||
use actix_web::web;
|
||||
|
||||
pub async fn test_database(postgres: web::Data<PgPool>) -> Result<(), sqlx::Error> {
|
||||
let mut transaction = postgres.acquire().await?;
|
||||
let result = sqlx::query(
|
||||
"
|
||||
SELECT 1
|
||||
"
|
||||
).execute(&mut transaction)
|
||||
.await;
|
||||
match result {
|
||||
Ok(_) => Ok(()),
|
||||
Err(e) => Err(e)
|
||||
}
|
||||
}
|
||||
25
src/main.rs
25
src/main.rs
@ -10,6 +10,11 @@ use rand::Rng;
|
||||
use search::indexing::index_projects;
|
||||
use search::indexing::IndexingSettings;
|
||||
use std::sync::Arc;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::Ordering;
|
||||
use crate::health::pod::PodInfo;
|
||||
use crate::health::scheduler::HealthCounters;
|
||||
use actix_web_prom::{PrometheusMetricsBuilder};
|
||||
|
||||
mod database;
|
||||
mod file_hosting;
|
||||
@ -17,6 +22,7 @@ mod models;
|
||||
mod routes;
|
||||
mod scheduler;
|
||||
mod search;
|
||||
mod health;
|
||||
mod util;
|
||||
mod validate;
|
||||
|
||||
@ -232,6 +238,7 @@ async fn main() -> std::io::Result<()> {
|
||||
if let Err(e) = result {
|
||||
warn!("Indexing created projects failed: {:?}", e);
|
||||
}
|
||||
crate::health::SEARCH_READY.store(true, Ordering::Release);
|
||||
info!("Done indexing created project queue");
|
||||
}
|
||||
});
|
||||
@ -243,12 +250,29 @@ async fn main() -> std::io::Result<()> {
|
||||
};
|
||||
|
||||
let store = MemoryStore::new();
|
||||
// Generate pod id
|
||||
let pod = PodInfo::new();
|
||||
// Init prometheus cluster
|
||||
let mut labels = HashMap::new();
|
||||
labels.insert("pod".to_string(), pod.pod_name);
|
||||
labels.insert("node".to_string(), pod.node_name);
|
||||
|
||||
// Get prometheus service
|
||||
let mut prometheus = PrometheusMetricsBuilder::new("api")
|
||||
.endpoint("/metrics")
|
||||
.build()
|
||||
.unwrap();
|
||||
// Get custom service
|
||||
let health = HealthCounters::new();
|
||||
health.register(&mut prometheus);
|
||||
health.schedule(pool.clone(), &mut scheduler);
|
||||
info!("Starting Actix HTTP server!");
|
||||
|
||||
// Init App
|
||||
HttpServer::new(move || {
|
||||
App::new()
|
||||
.wrap(prometheus.clone())
|
||||
.wrap(health.clone())
|
||||
.wrap(
|
||||
Cors::default()
|
||||
.allowed_methods(vec!["GET", "POST", "DELETE", "PATCH", "PUT"])
|
||||
@ -311,6 +335,7 @@ async fn main() -> std::io::Result<()> {
|
||||
.configure(routes::v1_config)
|
||||
.configure(routes::v2_config)
|
||||
.service(routes::index_get)
|
||||
.service(routes::health_get)
|
||||
.service(web::scope("/maven/").configure(routes::maven_config))
|
||||
.default_service(web::get().to(routes::not_found))
|
||||
})
|
||||
|
||||
@ -4,4 +4,4 @@ pub mod notifications;
|
||||
pub mod projects;
|
||||
pub mod reports;
|
||||
pub mod teams;
|
||||
pub mod users;
|
||||
pub mod users;
|
||||
31
src/routes/health.rs
Normal file
31
src/routes/health.rs
Normal file
@ -0,0 +1,31 @@
|
||||
use actix_web::{get, HttpResponse};
|
||||
use serde_json::json;
|
||||
use crate::health::status::test_database;
|
||||
use actix_web::web::Data;
|
||||
use sqlx::PgPool;
|
||||
use crate::health::SEARCH_READY;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
#[get("/health")]
|
||||
pub async fn health_get(client: Data<PgPool>) -> HttpResponse {
|
||||
// Check database connection:
|
||||
let result = test_database(client).await;
|
||||
if result.is_err() {
|
||||
let data = json!({
|
||||
"ready": false,
|
||||
"reason": "Database connection error"
|
||||
});
|
||||
return HttpResponse::InternalServerError().json(data)
|
||||
}
|
||||
if !SEARCH_READY.load(Ordering::Acquire) {
|
||||
let data = json!({
|
||||
"ready": false,
|
||||
"reason": "Indexing is not finished"
|
||||
});
|
||||
return HttpResponse::InternalServerError().json(data)
|
||||
}
|
||||
HttpResponse::Ok().json(json!({
|
||||
"ready": true,
|
||||
"reason": "Everything is OK"
|
||||
}))
|
||||
}
|
||||
@ -18,11 +18,13 @@ mod users;
|
||||
mod version_creation;
|
||||
mod version_file;
|
||||
mod versions;
|
||||
mod health;
|
||||
|
||||
pub use auth::config as auth_config;
|
||||
pub use tags::config as tags_config;
|
||||
|
||||
pub use self::index::index_get;
|
||||
pub use self::health::health_get;
|
||||
pub use self::not_found::not_found;
|
||||
use crate::file_hosting::FileHostingError;
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user