feat: add metrics to drainer (#497)

This commit is contained in:
Nishant Joshi
2023-02-06 13:03:53 +05:30
committed by GitHub
parent d09ed14453
commit 4a820dcd7d
7 changed files with 233 additions and 63 deletions

1
Cargo.lock generated
View File

@ -1263,6 +1263,7 @@ dependencies = [
"config", "config",
"diesel", "diesel",
"error-stack", "error-stack",
"once_cell",
"redis_interface", "redis_interface",
"router_env", "router_env",
"serde", "serde",

View File

@ -13,7 +13,7 @@ pub mod validation;
/// Date-time utilities. /// Date-time utilities.
pub mod date_time { pub mod date_time {
use time::{OffsetDateTime, PrimitiveDateTime}; use time::{Instant, OffsetDateTime, PrimitiveDateTime};
/// Struct to represent milliseconds in time sensitive data fields /// Struct to represent milliseconds in time sensitive data fields
#[derive(Debug)] #[derive(Debug)]
pub struct Milliseconds(i32); pub struct Milliseconds(i32);
@ -33,6 +33,15 @@ pub mod date_time {
pub fn now_unix_timestamp() -> i64 { pub fn now_unix_timestamp() -> i64 {
OffsetDateTime::now_utc().unix_timestamp() OffsetDateTime::now_utc().unix_timestamp()
} }
/// Calculate execution time for a async block in milliseconds
pub async fn time_it<T, Fut: futures::Future<Output = T>, F: FnOnce() -> Fut>(
block: F,
) -> (T, f64) {
let start = Instant::now();
let result = block().await;
(result, start.elapsed().as_seconds_f64() * 1000f64)
}
} }
/// Generate a nanoid with the given prefix and length /// Generate a nanoid with the given prefix and length

View File

@ -14,6 +14,7 @@ clap = { version = "4.1.4", default-features = false, features = ["std", "derive
config = { version = "0.13.3", features = ["toml"] } config = { version = "0.13.3", features = ["toml"] }
diesel = { version = "2.0.3", features = ["postgres", "serde_json", "time"] } diesel = { version = "2.0.3", features = ["postgres", "serde_json", "time"] }
error-stack = "0.2.4" error-stack = "0.2.4"
once_cell = "1.17.0"
serde = "1.0.152" serde = "1.0.152"
serde_json = "1.0.91" serde_json = "1.0.91"
serde_path_to_error = "0.1.9" serde_path_to_error = "0.1.9"

View File

@ -1,6 +1,7 @@
mod connection; mod connection;
pub mod env; pub mod env;
pub mod errors; pub mod errors;
pub(crate) mod metrics;
pub mod services; pub mod services;
pub mod settings; pub mod settings;
mod utils; mod utils;
@ -19,12 +20,14 @@ pub async fn start_drainer(
max_read_count: u64, max_read_count: u64,
) -> errors::DrainerResult<()> { ) -> errors::DrainerResult<()> {
let mut stream_index: u8 = 0; let mut stream_index: u8 = 0;
let mut jobs_picked: u8 = 0;
loop { loop {
if utils::is_stream_available(stream_index, store.clone()).await { if utils::is_stream_available(stream_index, store.clone()).await {
tokio::spawn(drainer_handler(store.clone(), stream_index, max_read_count)); tokio::spawn(drainer_handler(store.clone(), stream_index, max_read_count));
jobs_picked += 1;
} }
stream_index = utils::increment_stream_index(stream_index, number_of_streams); (stream_index, jobs_picked) =
utils::increment_stream_index((stream_index, jobs_picked), number_of_streams);
} }
} }
@ -55,9 +58,17 @@ async fn drainer(
// parse_stream_entries returns error if no entries is found, handle it // parse_stream_entries returns error if no entries is found, handle it
let (entries, last_entry_id) = utils::parse_stream_entries(&stream_read, stream_name)?; let (entries, last_entry_id) = utils::parse_stream_entries(&stream_read, stream_name)?;
let read_count = entries.len(); let read_count = entries.len();
metrics::JOBS_PICKED_PER_STREAM.add(
&metrics::CONTEXT,
u64::try_from(read_count).unwrap_or(u64::MIN),
&[metrics::KeyValue {
key: "stream".into(),
value: stream_name.to_string().into(),
}],
);
// TODO: Handle errors when deserialization fails and when DB error occurs // TODO: Handle errors when deserialization fails and when DB error occurs
for entry in entries { for entry in entries {
let typed_sql = entry.1.get("typed_sql").map_or(String::new(), Clone::clone); let typed_sql = entry.1.get("typed_sql").map_or(String::new(), Clone::clone);
@ -72,41 +83,80 @@ async fn drainer(
let update_op = "update"; let update_op = "update";
let payment_intent = "payment_intent"; let payment_intent = "payment_intent";
let payment_attempt = "payment_attempt"; let payment_attempt = "payment_attempt";
let refund = "refund";
match db_op { match db_op {
// TODO: Handle errors // TODO: Handle errors
kv::DBOperation::Insert { insertable } => match insertable { kv::DBOperation::Insert { insertable } => {
kv::Insertable::PaymentIntent(a) => { let (_, execution_time) = common_utils::date_time::time_it(|| async {
macro_util::handle_resp!(a.insert(&conn).await, insert_op, payment_intent) match insertable {
} kv::Insertable::PaymentIntent(a) => {
kv::Insertable::PaymentAttempt(a) => { macro_util::handle_resp!(
macro_util::handle_resp!(a.insert(&conn).await, insert_op, payment_attempt) a.insert(&conn).await,
} insert_op,
kv::Insertable::Refund(a) => { payment_intent
macro_util::handle_resp!(a.insert(&conn).await, "ins", "ref") )
} }
}, kv::Insertable::PaymentAttempt(a) => {
kv::DBOperation::Update { updatable } => match updatable { macro_util::handle_resp!(
kv::Updateable::PaymentIntentUpdate(a) => { a.insert(&conn).await,
macro_util::handle_resp!( insert_op,
a.orig.update(&conn, a.update_data).await, payment_attempt
update_op, )
payment_intent }
) kv::Insertable::Refund(a) => {
} macro_util::handle_resp!(a.insert(&conn).await, insert_op, refund)
kv::Updateable::PaymentAttemptUpdate(a) => { }
macro_util::handle_resp!( }
a.orig.update(&conn, a.update_data).await, })
update_op, .await;
payment_attempt metrics::QUERY_EXECUTION_TIME.record(
) &metrics::CONTEXT,
} execution_time,
kv::Updateable::RefundUpdate(a) => { &[metrics::KeyValue {
macro_util::handle_resp!(a.orig.update(&conn, a.update_data).await, "up", "ref") key: "operation".into(),
} value: insert_op.into(),
}, }],
);
}
kv::DBOperation::Update { updatable } => {
let (_, execution_time) = common_utils::date_time::time_it(|| async {
match updatable {
kv::Updateable::PaymentIntentUpdate(a) => {
macro_util::handle_resp!(
a.orig.update(&conn, a.update_data).await,
update_op,
payment_intent
)
}
kv::Updateable::PaymentAttemptUpdate(a) => {
macro_util::handle_resp!(
a.orig.update(&conn, a.update_data).await,
update_op,
payment_attempt
)
}
kv::Updateable::RefundUpdate(a) => {
macro_util::handle_resp!(
a.orig.update(&conn, a.update_data).await,
update_op,
refund
)
}
}
})
.await;
metrics::QUERY_EXECUTION_TIME.record(
&metrics::CONTEXT,
execution_time,
&[metrics::KeyValue {
key: "operation".into(),
value: update_op.into(),
}],
);
}
kv::DBOperation::Delete => { kv::DBOperation::Delete => {
// TODO: Implement this // [#224]: Implement this
println!("Not implemented!"); logger::error!("Not implemented!");
} }
}; };
} }
@ -115,7 +165,12 @@ async fn drainer(
utils::trim_from_stream(stream_name, last_entry_id.as_str(), &store.redis_conn).await?; utils::trim_from_stream(stream_name, last_entry_id.as_str(), &store.redis_conn).await?;
if read_count != entries_trimmed { if read_count != entries_trimmed {
// TODO: log logger::error!(
read_entries = %read_count,
trimmed_entries = %entries_trimmed,
?entries,
"Assertion Failed no. of entries read from the stream doesn't match no. of entries trimmed"
);
} }
Ok(()) Ok(())
@ -126,8 +181,24 @@ mod macro_util {
macro_rules! handle_resp { macro_rules! handle_resp {
($result:expr,$op_type:expr, $table:expr) => { ($result:expr,$op_type:expr, $table:expr) => {
match $result { match $result {
Ok(aa) => logger::info!("Ok|{}|{}|{:?}|", $op_type, $table, aa), Ok(inner_result) => {
Err(err) => logger::error!("Err|{}|{}|{:?}|", $op_type, $table, err), logger::info!(operation = %$op_type, table = %$table, ?inner_result);
metrics::SUCCESSFUL_QUERY_EXECUTION.add(&metrics::CONTEXT, 1, &[
metrics::KeyValue {
key: "operation".into(),
value: $table.into(),
}
]);
}
Err(err) => {
logger::error!(operation = %$op_type, table = %$table, ?err);
metrics::ERRORS_WHILE_QUERY_EXECUTION.add(&metrics::CONTEXT, 1, &[
metrics::KeyValue {
key: "operation".into(),
value: $table.into(),
}
]);
}
} }
}; };
} }

View File

@ -0,0 +1,59 @@
use once_cell::sync::Lazy;
pub use router_env::opentelemetry::KeyValue;
use router_env::opentelemetry::{
global,
metrics::{Counter, Histogram, Meter},
Context,
};
pub(crate) static CONTEXT: Lazy<Context> = Lazy::new(Context::current);
static DRAINER_METER: Lazy<Meter> = Lazy::new(|| global::meter("DRAINER"));
// Time in (ms) milliseconds
pub(crate) static QUERY_EXECUTION_TIME: Lazy<Histogram<f64>> =
Lazy::new(|| DRAINER_METER.f64_histogram("QUERY_EXECUTION_TIME").init());
pub(crate) static JOBS_PICKED_PER_STREAM: Lazy<Counter<u64>> =
Lazy::new(|| DRAINER_METER.u64_counter("JOBS_PICKED_PER_CYCLE").init());
pub(crate) static CYCLES_COMPLETED_SUCCESSFULLY: Lazy<Counter<u64>> = Lazy::new(|| {
DRAINER_METER
.u64_counter("CYCLES_COMPLETED_SUCCESSFULLY")
.init()
});
pub(crate) static CYCLES_COMPLETED_UNSUCCESSFULLY: Lazy<Counter<u64>> = Lazy::new(|| {
DRAINER_METER
.u64_counter("CYCLES_COMPLETED_UNSUCCESSFULLY")
.init()
});
pub(crate) static ERRORS_WHILE_QUERY_EXECUTION: Lazy<Counter<u64>> = Lazy::new(|| {
DRAINER_METER
.u64_counter("ERRORS_WHILE_QUERY_EXECUTION")
.init()
});
pub(crate) static SUCCESSFUL_QUERY_EXECUTION: Lazy<Counter<u64>> = Lazy::new(|| {
DRAINER_METER
.u64_counter("SUCCESSFUL_QUERY_EXECUTION")
.init()
});
// Time in (ms) milliseconds
pub(crate) static REDIS_STREAM_READ_TIME: Lazy<Histogram<f64>> =
Lazy::new(|| DRAINER_METER.f64_histogram("REDIS_STREAM_READ_TIME").init());
// Time in (ms) milliseconds
pub(crate) static REDIS_STREAM_TRIM_TIME: Lazy<Histogram<f64>> =
Lazy::new(|| DRAINER_METER.f64_histogram("REDIS_STREAM_TRIM_TIME").init());
pub(crate) static _SHUTDOWN_SIGNAL_RECEIVED: Lazy<Counter<u64>> =
Lazy::new(|| DRAINER_METER.u64_counter("SHUTDOWN_SIGNAL_RECEIVED").init());
pub(crate) static _SUCCESSFUL_SHUTDOWN: Lazy<Counter<u64>> =
Lazy::new(|| DRAINER_METER.u64_counter("SUCCESSFUL_SHUTDOWN").init());
// Time in (ms) milliseconds
pub(crate) static _CLEANUP_TIME: Lazy<Histogram<f64>> =
Lazy::new(|| DRAINER_METER.f64_histogram("CLEANUP_TIME").init());

View File

@ -5,7 +5,7 @@ use redis_interface as redis;
use crate::{ use crate::{
errors::{self, DrainerError}, errors::{self, DrainerError},
logger, services, logger, metrics, services,
}; };
pub type StreamEntries = Vec<(String, HashMap<String, String>)>; pub type StreamEntries = Vec<(String, HashMap<String, String>)>;
@ -35,12 +35,23 @@ pub async fn read_from_stream(
) -> errors::DrainerResult<StreamReadResult> { ) -> errors::DrainerResult<StreamReadResult> {
// "0-0" id gives first entry // "0-0" id gives first entry
let stream_id = "0-0"; let stream_id = "0-0";
let entries = redis let (output, execution_time) = common_utils::date_time::time_it(|| async {
.stream_read_entries(stream_name, stream_id, Some(max_read_count)) let entries = redis
.await .stream_read_entries(stream_name, stream_id, Some(max_read_count))
.map_err(DrainerError::from) .await
.into_report()?; .map_err(DrainerError::from)
Ok(entries) .into_report()?;
Ok(entries)
})
.await;
metrics::REDIS_STREAM_READ_TIME.record(
&metrics::CONTEXT,
execution_time,
&[metrics::KeyValue::new("stream", stream_name.to_owned())],
);
output
} }
pub async fn trim_from_stream( pub async fn trim_from_stream(
@ -51,22 +62,34 @@ pub async fn trim_from_stream(
let trim_kind = redis::StreamCapKind::MinID; let trim_kind = redis::StreamCapKind::MinID;
let trim_type = redis::StreamCapTrim::Exact; let trim_type = redis::StreamCapTrim::Exact;
let trim_id = minimum_entry_id; let trim_id = minimum_entry_id;
let trim_result = redis let (trim_result, execution_time) =
.stream_trim_entries(stream_name, (trim_kind, trim_type, trim_id)) common_utils::date_time::time_it::<errors::DrainerResult<_>, _, _>(|| async {
.await let trim_result = redis
.map_err(DrainerError::from) .stream_trim_entries(stream_name, (trim_kind, trim_type, trim_id))
.into_report()?; .await
.map_err(DrainerError::from)
.into_report()?;
// Since xtrim deletes entries below given id excluding the given id. // Since xtrim deletes entries below given id excluding the given id.
// Hence, deleting the minimum entry id // Hence, deleting the minimum entry id
redis redis
.stream_delete_entries(stream_name, minimum_entry_id) .stream_delete_entries(stream_name, minimum_entry_id)
.await .await
.map_err(DrainerError::from) .map_err(DrainerError::from)
.into_report()?; .into_report()?;
Ok(trim_result)
})
.await;
metrics::REDIS_STREAM_TRIM_TIME.record(
&metrics::CONTEXT,
execution_time,
&[metrics::KeyValue::new("stream", stream_name.to_owned())],
);
// adding 1 because we are deleting the given id too // adding 1 because we are deleting the given id too
Ok(trim_result + 1) Ok(trim_result? + 1)
} }
pub async fn make_stream_available( pub async fn make_stream_available(
@ -99,11 +122,17 @@ pub fn parse_stream_entries<'a>(
.into_report() .into_report()
} }
pub fn increment_stream_index(index: u8, total_streams: u8) -> u8 { // Here the output is in the format (stream_index, jobs_picked),
// similar to the first argument of the function
pub fn increment_stream_index((index, jobs_picked): (u8, u8), total_streams: u8) -> (u8, u8) {
if index == total_streams - 1 { if index == total_streams - 1 {
0 match jobs_picked {
0 => metrics::CYCLES_COMPLETED_UNSUCCESSFULLY.add(&metrics::CONTEXT, 1, &[]),
_ => metrics::CYCLES_COMPLETED_SUCCESSFULLY.add(&metrics::CONTEXT, 1, &[]),
}
(0, 0)
} else { } else {
index + 1 (index + 1, jobs_picked)
} }
} }

View File

@ -5,7 +5,7 @@ use router_env::opentelemetry::{
Context, Context,
}; };
pub static CONTEXT: Lazy<Context> = Lazy::new(Context::current); pub(crate) static CONTEXT: Lazy<Context> = Lazy::new(Context::current);
static PT_METER: Lazy<Meter> = Lazy::new(|| global::meter("PROCESS_TRACKER")); static PT_METER: Lazy<Meter> = Lazy::new(|| global::meter("PROCESS_TRACKER"));
pub(crate) static CONSUMER_STATS: Lazy<Histogram<f64>> = pub(crate) static CONSUMER_STATS: Lazy<Histogram<f64>> =