mirror of
https://github.com/juspay/hyperswitch.git
synced 2025-11-02 21:07:58 +08:00
feat: add metrics to drainer (#497)
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1263,6 +1263,7 @@ dependencies = [
|
|||||||
"config",
|
"config",
|
||||||
"diesel",
|
"diesel",
|
||||||
"error-stack",
|
"error-stack",
|
||||||
|
"once_cell",
|
||||||
"redis_interface",
|
"redis_interface",
|
||||||
"router_env",
|
"router_env",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@ -13,7 +13,7 @@ pub mod validation;
|
|||||||
|
|
||||||
/// Date-time utilities.
|
/// Date-time utilities.
|
||||||
pub mod date_time {
|
pub mod date_time {
|
||||||
use time::{OffsetDateTime, PrimitiveDateTime};
|
use time::{Instant, OffsetDateTime, PrimitiveDateTime};
|
||||||
/// Struct to represent milliseconds in time sensitive data fields
|
/// Struct to represent milliseconds in time sensitive data fields
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Milliseconds(i32);
|
pub struct Milliseconds(i32);
|
||||||
@ -33,6 +33,15 @@ pub mod date_time {
|
|||||||
pub fn now_unix_timestamp() -> i64 {
|
pub fn now_unix_timestamp() -> i64 {
|
||||||
OffsetDateTime::now_utc().unix_timestamp()
|
OffsetDateTime::now_utc().unix_timestamp()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Calculate execution time for a async block in milliseconds
|
||||||
|
pub async fn time_it<T, Fut: futures::Future<Output = T>, F: FnOnce() -> Fut>(
|
||||||
|
block: F,
|
||||||
|
) -> (T, f64) {
|
||||||
|
let start = Instant::now();
|
||||||
|
let result = block().await;
|
||||||
|
(result, start.elapsed().as_seconds_f64() * 1000f64)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate a nanoid with the given prefix and length
|
/// Generate a nanoid with the given prefix and length
|
||||||
|
|||||||
@ -14,6 +14,7 @@ clap = { version = "4.1.4", default-features = false, features = ["std", "derive
|
|||||||
config = { version = "0.13.3", features = ["toml"] }
|
config = { version = "0.13.3", features = ["toml"] }
|
||||||
diesel = { version = "2.0.3", features = ["postgres", "serde_json", "time"] }
|
diesel = { version = "2.0.3", features = ["postgres", "serde_json", "time"] }
|
||||||
error-stack = "0.2.4"
|
error-stack = "0.2.4"
|
||||||
|
once_cell = "1.17.0"
|
||||||
serde = "1.0.152"
|
serde = "1.0.152"
|
||||||
serde_json = "1.0.91"
|
serde_json = "1.0.91"
|
||||||
serde_path_to_error = "0.1.9"
|
serde_path_to_error = "0.1.9"
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
mod connection;
|
mod connection;
|
||||||
pub mod env;
|
pub mod env;
|
||||||
pub mod errors;
|
pub mod errors;
|
||||||
|
pub(crate) mod metrics;
|
||||||
pub mod services;
|
pub mod services;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
mod utils;
|
mod utils;
|
||||||
@ -19,12 +20,14 @@ pub async fn start_drainer(
|
|||||||
max_read_count: u64,
|
max_read_count: u64,
|
||||||
) -> errors::DrainerResult<()> {
|
) -> errors::DrainerResult<()> {
|
||||||
let mut stream_index: u8 = 0;
|
let mut stream_index: u8 = 0;
|
||||||
|
let mut jobs_picked: u8 = 0;
|
||||||
loop {
|
loop {
|
||||||
if utils::is_stream_available(stream_index, store.clone()).await {
|
if utils::is_stream_available(stream_index, store.clone()).await {
|
||||||
tokio::spawn(drainer_handler(store.clone(), stream_index, max_read_count));
|
tokio::spawn(drainer_handler(store.clone(), stream_index, max_read_count));
|
||||||
|
jobs_picked += 1;
|
||||||
}
|
}
|
||||||
stream_index = utils::increment_stream_index(stream_index, number_of_streams);
|
(stream_index, jobs_picked) =
|
||||||
|
utils::increment_stream_index((stream_index, jobs_picked), number_of_streams);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,9 +58,17 @@ async fn drainer(
|
|||||||
|
|
||||||
// parse_stream_entries returns error if no entries is found, handle it
|
// parse_stream_entries returns error if no entries is found, handle it
|
||||||
let (entries, last_entry_id) = utils::parse_stream_entries(&stream_read, stream_name)?;
|
let (entries, last_entry_id) = utils::parse_stream_entries(&stream_read, stream_name)?;
|
||||||
|
|
||||||
let read_count = entries.len();
|
let read_count = entries.len();
|
||||||
|
|
||||||
|
metrics::JOBS_PICKED_PER_STREAM.add(
|
||||||
|
&metrics::CONTEXT,
|
||||||
|
u64::try_from(read_count).unwrap_or(u64::MIN),
|
||||||
|
&[metrics::KeyValue {
|
||||||
|
key: "stream".into(),
|
||||||
|
value: stream_name.to_string().into(),
|
||||||
|
}],
|
||||||
|
);
|
||||||
|
|
||||||
// TODO: Handle errors when deserialization fails and when DB error occurs
|
// TODO: Handle errors when deserialization fails and when DB error occurs
|
||||||
for entry in entries {
|
for entry in entries {
|
||||||
let typed_sql = entry.1.get("typed_sql").map_or(String::new(), Clone::clone);
|
let typed_sql = entry.1.get("typed_sql").map_or(String::new(), Clone::clone);
|
||||||
@ -72,41 +83,80 @@ async fn drainer(
|
|||||||
let update_op = "update";
|
let update_op = "update";
|
||||||
let payment_intent = "payment_intent";
|
let payment_intent = "payment_intent";
|
||||||
let payment_attempt = "payment_attempt";
|
let payment_attempt = "payment_attempt";
|
||||||
|
let refund = "refund";
|
||||||
match db_op {
|
match db_op {
|
||||||
// TODO: Handle errors
|
// TODO: Handle errors
|
||||||
kv::DBOperation::Insert { insertable } => match insertable {
|
kv::DBOperation::Insert { insertable } => {
|
||||||
kv::Insertable::PaymentIntent(a) => {
|
let (_, execution_time) = common_utils::date_time::time_it(|| async {
|
||||||
macro_util::handle_resp!(a.insert(&conn).await, insert_op, payment_intent)
|
match insertable {
|
||||||
}
|
kv::Insertable::PaymentIntent(a) => {
|
||||||
kv::Insertable::PaymentAttempt(a) => {
|
macro_util::handle_resp!(
|
||||||
macro_util::handle_resp!(a.insert(&conn).await, insert_op, payment_attempt)
|
a.insert(&conn).await,
|
||||||
}
|
insert_op,
|
||||||
kv::Insertable::Refund(a) => {
|
payment_intent
|
||||||
macro_util::handle_resp!(a.insert(&conn).await, "ins", "ref")
|
)
|
||||||
}
|
}
|
||||||
},
|
kv::Insertable::PaymentAttempt(a) => {
|
||||||
kv::DBOperation::Update { updatable } => match updatable {
|
macro_util::handle_resp!(
|
||||||
kv::Updateable::PaymentIntentUpdate(a) => {
|
a.insert(&conn).await,
|
||||||
macro_util::handle_resp!(
|
insert_op,
|
||||||
a.orig.update(&conn, a.update_data).await,
|
payment_attempt
|
||||||
update_op,
|
)
|
||||||
payment_intent
|
}
|
||||||
)
|
kv::Insertable::Refund(a) => {
|
||||||
}
|
macro_util::handle_resp!(a.insert(&conn).await, insert_op, refund)
|
||||||
kv::Updateable::PaymentAttemptUpdate(a) => {
|
}
|
||||||
macro_util::handle_resp!(
|
}
|
||||||
a.orig.update(&conn, a.update_data).await,
|
})
|
||||||
update_op,
|
.await;
|
||||||
payment_attempt
|
metrics::QUERY_EXECUTION_TIME.record(
|
||||||
)
|
&metrics::CONTEXT,
|
||||||
}
|
execution_time,
|
||||||
kv::Updateable::RefundUpdate(a) => {
|
&[metrics::KeyValue {
|
||||||
macro_util::handle_resp!(a.orig.update(&conn, a.update_data).await, "up", "ref")
|
key: "operation".into(),
|
||||||
}
|
value: insert_op.into(),
|
||||||
},
|
}],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
kv::DBOperation::Update { updatable } => {
|
||||||
|
let (_, execution_time) = common_utils::date_time::time_it(|| async {
|
||||||
|
match updatable {
|
||||||
|
kv::Updateable::PaymentIntentUpdate(a) => {
|
||||||
|
macro_util::handle_resp!(
|
||||||
|
a.orig.update(&conn, a.update_data).await,
|
||||||
|
update_op,
|
||||||
|
payment_intent
|
||||||
|
)
|
||||||
|
}
|
||||||
|
kv::Updateable::PaymentAttemptUpdate(a) => {
|
||||||
|
macro_util::handle_resp!(
|
||||||
|
a.orig.update(&conn, a.update_data).await,
|
||||||
|
update_op,
|
||||||
|
payment_attempt
|
||||||
|
)
|
||||||
|
}
|
||||||
|
kv::Updateable::RefundUpdate(a) => {
|
||||||
|
macro_util::handle_resp!(
|
||||||
|
a.orig.update(&conn, a.update_data).await,
|
||||||
|
update_op,
|
||||||
|
refund
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
metrics::QUERY_EXECUTION_TIME.record(
|
||||||
|
&metrics::CONTEXT,
|
||||||
|
execution_time,
|
||||||
|
&[metrics::KeyValue {
|
||||||
|
key: "operation".into(),
|
||||||
|
value: update_op.into(),
|
||||||
|
}],
|
||||||
|
);
|
||||||
|
}
|
||||||
kv::DBOperation::Delete => {
|
kv::DBOperation::Delete => {
|
||||||
// TODO: Implement this
|
// [#224]: Implement this
|
||||||
println!("Not implemented!");
|
logger::error!("Not implemented!");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -115,7 +165,12 @@ async fn drainer(
|
|||||||
utils::trim_from_stream(stream_name, last_entry_id.as_str(), &store.redis_conn).await?;
|
utils::trim_from_stream(stream_name, last_entry_id.as_str(), &store.redis_conn).await?;
|
||||||
|
|
||||||
if read_count != entries_trimmed {
|
if read_count != entries_trimmed {
|
||||||
// TODO: log
|
logger::error!(
|
||||||
|
read_entries = %read_count,
|
||||||
|
trimmed_entries = %entries_trimmed,
|
||||||
|
?entries,
|
||||||
|
"Assertion Failed no. of entries read from the stream doesn't match no. of entries trimmed"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -126,8 +181,24 @@ mod macro_util {
|
|||||||
macro_rules! handle_resp {
|
macro_rules! handle_resp {
|
||||||
($result:expr,$op_type:expr, $table:expr) => {
|
($result:expr,$op_type:expr, $table:expr) => {
|
||||||
match $result {
|
match $result {
|
||||||
Ok(aa) => logger::info!("Ok|{}|{}|{:?}|", $op_type, $table, aa),
|
Ok(inner_result) => {
|
||||||
Err(err) => logger::error!("Err|{}|{}|{:?}|", $op_type, $table, err),
|
logger::info!(operation = %$op_type, table = %$table, ?inner_result);
|
||||||
|
metrics::SUCCESSFUL_QUERY_EXECUTION.add(&metrics::CONTEXT, 1, &[
|
||||||
|
metrics::KeyValue {
|
||||||
|
key: "operation".into(),
|
||||||
|
value: $table.into(),
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
logger::error!(operation = %$op_type, table = %$table, ?err);
|
||||||
|
metrics::ERRORS_WHILE_QUERY_EXECUTION.add(&metrics::CONTEXT, 1, &[
|
||||||
|
metrics::KeyValue {
|
||||||
|
key: "operation".into(),
|
||||||
|
value: $table.into(),
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
59
crates/drainer/src/metrics.rs
Normal file
59
crates/drainer/src/metrics.rs
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
use once_cell::sync::Lazy;
|
||||||
|
pub use router_env::opentelemetry::KeyValue;
|
||||||
|
use router_env::opentelemetry::{
|
||||||
|
global,
|
||||||
|
metrics::{Counter, Histogram, Meter},
|
||||||
|
Context,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub(crate) static CONTEXT: Lazy<Context> = Lazy::new(Context::current);
|
||||||
|
static DRAINER_METER: Lazy<Meter> = Lazy::new(|| global::meter("DRAINER"));
|
||||||
|
|
||||||
|
// Time in (ms) milliseconds
|
||||||
|
pub(crate) static QUERY_EXECUTION_TIME: Lazy<Histogram<f64>> =
|
||||||
|
Lazy::new(|| DRAINER_METER.f64_histogram("QUERY_EXECUTION_TIME").init());
|
||||||
|
|
||||||
|
pub(crate) static JOBS_PICKED_PER_STREAM: Lazy<Counter<u64>> =
|
||||||
|
Lazy::new(|| DRAINER_METER.u64_counter("JOBS_PICKED_PER_CYCLE").init());
|
||||||
|
|
||||||
|
pub(crate) static CYCLES_COMPLETED_SUCCESSFULLY: Lazy<Counter<u64>> = Lazy::new(|| {
|
||||||
|
DRAINER_METER
|
||||||
|
.u64_counter("CYCLES_COMPLETED_SUCCESSFULLY")
|
||||||
|
.init()
|
||||||
|
});
|
||||||
|
|
||||||
|
pub(crate) static CYCLES_COMPLETED_UNSUCCESSFULLY: Lazy<Counter<u64>> = Lazy::new(|| {
|
||||||
|
DRAINER_METER
|
||||||
|
.u64_counter("CYCLES_COMPLETED_UNSUCCESSFULLY")
|
||||||
|
.init()
|
||||||
|
});
|
||||||
|
|
||||||
|
pub(crate) static ERRORS_WHILE_QUERY_EXECUTION: Lazy<Counter<u64>> = Lazy::new(|| {
|
||||||
|
DRAINER_METER
|
||||||
|
.u64_counter("ERRORS_WHILE_QUERY_EXECUTION")
|
||||||
|
.init()
|
||||||
|
});
|
||||||
|
|
||||||
|
pub(crate) static SUCCESSFUL_QUERY_EXECUTION: Lazy<Counter<u64>> = Lazy::new(|| {
|
||||||
|
DRAINER_METER
|
||||||
|
.u64_counter("SUCCESSFUL_QUERY_EXECUTION")
|
||||||
|
.init()
|
||||||
|
});
|
||||||
|
|
||||||
|
// Time in (ms) milliseconds
|
||||||
|
pub(crate) static REDIS_STREAM_READ_TIME: Lazy<Histogram<f64>> =
|
||||||
|
Lazy::new(|| DRAINER_METER.f64_histogram("REDIS_STREAM_READ_TIME").init());
|
||||||
|
|
||||||
|
// Time in (ms) milliseconds
|
||||||
|
pub(crate) static REDIS_STREAM_TRIM_TIME: Lazy<Histogram<f64>> =
|
||||||
|
Lazy::new(|| DRAINER_METER.f64_histogram("REDIS_STREAM_TRIM_TIME").init());
|
||||||
|
|
||||||
|
pub(crate) static _SHUTDOWN_SIGNAL_RECEIVED: Lazy<Counter<u64>> =
|
||||||
|
Lazy::new(|| DRAINER_METER.u64_counter("SHUTDOWN_SIGNAL_RECEIVED").init());
|
||||||
|
|
||||||
|
pub(crate) static _SUCCESSFUL_SHUTDOWN: Lazy<Counter<u64>> =
|
||||||
|
Lazy::new(|| DRAINER_METER.u64_counter("SUCCESSFUL_SHUTDOWN").init());
|
||||||
|
|
||||||
|
// Time in (ms) milliseconds
|
||||||
|
pub(crate) static _CLEANUP_TIME: Lazy<Histogram<f64>> =
|
||||||
|
Lazy::new(|| DRAINER_METER.f64_histogram("CLEANUP_TIME").init());
|
||||||
@ -5,7 +5,7 @@ use redis_interface as redis;
|
|||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
errors::{self, DrainerError},
|
errors::{self, DrainerError},
|
||||||
logger, services,
|
logger, metrics, services,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type StreamEntries = Vec<(String, HashMap<String, String>)>;
|
pub type StreamEntries = Vec<(String, HashMap<String, String>)>;
|
||||||
@ -35,12 +35,23 @@ pub async fn read_from_stream(
|
|||||||
) -> errors::DrainerResult<StreamReadResult> {
|
) -> errors::DrainerResult<StreamReadResult> {
|
||||||
// "0-0" id gives first entry
|
// "0-0" id gives first entry
|
||||||
let stream_id = "0-0";
|
let stream_id = "0-0";
|
||||||
let entries = redis
|
let (output, execution_time) = common_utils::date_time::time_it(|| async {
|
||||||
.stream_read_entries(stream_name, stream_id, Some(max_read_count))
|
let entries = redis
|
||||||
.await
|
.stream_read_entries(stream_name, stream_id, Some(max_read_count))
|
||||||
.map_err(DrainerError::from)
|
.await
|
||||||
.into_report()?;
|
.map_err(DrainerError::from)
|
||||||
Ok(entries)
|
.into_report()?;
|
||||||
|
Ok(entries)
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
metrics::REDIS_STREAM_READ_TIME.record(
|
||||||
|
&metrics::CONTEXT,
|
||||||
|
execution_time,
|
||||||
|
&[metrics::KeyValue::new("stream", stream_name.to_owned())],
|
||||||
|
);
|
||||||
|
|
||||||
|
output
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn trim_from_stream(
|
pub async fn trim_from_stream(
|
||||||
@ -51,22 +62,34 @@ pub async fn trim_from_stream(
|
|||||||
let trim_kind = redis::StreamCapKind::MinID;
|
let trim_kind = redis::StreamCapKind::MinID;
|
||||||
let trim_type = redis::StreamCapTrim::Exact;
|
let trim_type = redis::StreamCapTrim::Exact;
|
||||||
let trim_id = minimum_entry_id;
|
let trim_id = minimum_entry_id;
|
||||||
let trim_result = redis
|
let (trim_result, execution_time) =
|
||||||
.stream_trim_entries(stream_name, (trim_kind, trim_type, trim_id))
|
common_utils::date_time::time_it::<errors::DrainerResult<_>, _, _>(|| async {
|
||||||
.await
|
let trim_result = redis
|
||||||
.map_err(DrainerError::from)
|
.stream_trim_entries(stream_name, (trim_kind, trim_type, trim_id))
|
||||||
.into_report()?;
|
.await
|
||||||
|
.map_err(DrainerError::from)
|
||||||
|
.into_report()?;
|
||||||
|
|
||||||
// Since xtrim deletes entries below given id excluding the given id.
|
// Since xtrim deletes entries below given id excluding the given id.
|
||||||
// Hence, deleting the minimum entry id
|
// Hence, deleting the minimum entry id
|
||||||
redis
|
redis
|
||||||
.stream_delete_entries(stream_name, minimum_entry_id)
|
.stream_delete_entries(stream_name, minimum_entry_id)
|
||||||
.await
|
.await
|
||||||
.map_err(DrainerError::from)
|
.map_err(DrainerError::from)
|
||||||
.into_report()?;
|
.into_report()?;
|
||||||
|
|
||||||
|
Ok(trim_result)
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
metrics::REDIS_STREAM_TRIM_TIME.record(
|
||||||
|
&metrics::CONTEXT,
|
||||||
|
execution_time,
|
||||||
|
&[metrics::KeyValue::new("stream", stream_name.to_owned())],
|
||||||
|
);
|
||||||
|
|
||||||
// adding 1 because we are deleting the given id too
|
// adding 1 because we are deleting the given id too
|
||||||
Ok(trim_result + 1)
|
Ok(trim_result? + 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn make_stream_available(
|
pub async fn make_stream_available(
|
||||||
@ -99,11 +122,17 @@ pub fn parse_stream_entries<'a>(
|
|||||||
.into_report()
|
.into_report()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn increment_stream_index(index: u8, total_streams: u8) -> u8 {
|
// Here the output is in the format (stream_index, jobs_picked),
|
||||||
|
// similar to the first argument of the function
|
||||||
|
pub fn increment_stream_index((index, jobs_picked): (u8, u8), total_streams: u8) -> (u8, u8) {
|
||||||
if index == total_streams - 1 {
|
if index == total_streams - 1 {
|
||||||
0
|
match jobs_picked {
|
||||||
|
0 => metrics::CYCLES_COMPLETED_UNSUCCESSFULLY.add(&metrics::CONTEXT, 1, &[]),
|
||||||
|
_ => metrics::CYCLES_COMPLETED_SUCCESSFULLY.add(&metrics::CONTEXT, 1, &[]),
|
||||||
|
}
|
||||||
|
(0, 0)
|
||||||
} else {
|
} else {
|
||||||
index + 1
|
(index + 1, jobs_picked)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -5,7 +5,7 @@ use router_env::opentelemetry::{
|
|||||||
Context,
|
Context,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub static CONTEXT: Lazy<Context> = Lazy::new(Context::current);
|
pub(crate) static CONTEXT: Lazy<Context> = Lazy::new(Context::current);
|
||||||
static PT_METER: Lazy<Meter> = Lazy::new(|| global::meter("PROCESS_TRACKER"));
|
static PT_METER: Lazy<Meter> = Lazy::new(|| global::meter("PROCESS_TRACKER"));
|
||||||
|
|
||||||
pub(crate) static CONSUMER_STATS: Lazy<Histogram<f64>> =
|
pub(crate) static CONSUMER_STATS: Lazy<Histogram<f64>> =
|
||||||
|
|||||||
Reference in New Issue
Block a user