feat(metrics): add support for gauge metrics and include IMC metrics (#4939)

Co-authored-by: hyperswitch-bot[bot] <148525504+hyperswitch-bot[bot]@users.noreply.github.com>
This commit is contained in:
Chethan Rao
2024-06-11 20:22:20 +05:30
committed by GitHub
parent a949676f8b
commit 42cd769407
10 changed files with 95 additions and 4 deletions

View File

@ -112,6 +112,7 @@ otel_exporter_otlp_endpoint = "http://localhost:4317" # endpoint to send metrics
otel_exporter_otlp_timeout = 5000 # timeout (in milliseconds) for sending metrics and traces
use_xray_generator = false # Set this to true for AWS X-ray compatible traces
route_to_trace = ["*/confirm"]
bg_metrics_collection_interval_in_secs = 15 # Interval for collecting the metrics in background thread
# This section provides some secret values.
[secrets]

View File

@ -139,6 +139,7 @@ otel_exporter_otlp_endpoint = "http://localhost:4317" # endpoint to send metrics
otel_exporter_otlp_timeout = 5000 # timeout (in milliseconds) for sending metrics and traces
use_xray_generator = false # Set this to true for AWS X-ray compatible traces
route_to_trace = ["*/confirm"]
bg_metrics_collection_interval_in_secs = 15 # Interval for collecting the metrics in background thread
[lock_settings]
delay_between_retries_in_milliseconds = 500 # Delay between retries in milliseconds

View File

@ -10,6 +10,7 @@ log_format = "default"
traces_enabled = false
metrics_enabled = false
use_xray_generator = false
bg_metrics_collection_interval_in_secs = 15
# TODO: Update database credentials before running application
[master_database]

View File

@ -18,7 +18,8 @@ traces_enabled = false # Whether traces are
metrics_enabled = true # Whether metrics are enabled.
ignore_errors = false # Whether to ignore errors during traces or metrics pipeline setup.
otel_exporter_otlp_endpoint = "https://otel-collector:4317" # Endpoint to send metrics and traces to.
use_xray_generator = false
use_xray_generator = false # Set this to true for AWS X-ray compatible traces
bg_metrics_collection_interval_in_secs = 15 # Interval for collecting the metrics in background thread
[master_database]
username = "db_user"

View File

@ -2,6 +2,7 @@ use router::{
configs::settings::{CmdLineConf, Settings},
core::errors::{ApplicationError, ApplicationResult},
logger,
routes::metrics,
};
#[tokio::main]
@ -27,6 +28,11 @@ async fn main() -> ApplicationResult<()> {
logger::info!("Application started [{:?}] [{:?}]", conf.server, conf.log);
// Spawn a thread for collecting metrics at fixed intervals
metrics::bg_metrics_collector::spawn_metrics_collector(
&conf.log.telemetry.bg_metrics_collection_interval_in_secs,
);
#[allow(clippy::expect_used)]
let server = Box::pin(router::start_server(conf))
.await

View File

@ -1,4 +1,8 @@
use router_env::{counter_metric, global_meter, histogram_metric, metrics_context};
pub mod bg_metrics_collector;
pub mod request;
pub mod utils;
use router_env::{counter_metric, gauge_metric, global_meter, histogram_metric, metrics_context};
metrics_context!(CONTEXT);
global_meter!(GLOBAL_METER, "ROUTER_API");
@ -133,5 +137,5 @@ counter_metric!(ACCESS_TOKEN_CACHE_HIT, GLOBAL_METER);
// A counter to indicate the access token cache miss
counter_metric!(ACCESS_TOKEN_CACHE_MISS, GLOBAL_METER);
pub mod request;
pub mod utils;
// Metrics for In-memory cache
gauge_metric!(CACHE_ENTRY_COUNT, GLOBAL_METER);

View File

@ -0,0 +1,46 @@
use storage_impl::redis::cache;
const DEFAULT_BG_METRICS_COLLECTION_INTERVAL_IN_SECS: u16 = 15;
macro_rules! gauge_metrics_for_imc {
($($cache:ident),*) => {
$(
{
cache::$cache.run_pending_tasks().await;
super::CACHE_ENTRY_COUNT.observe(
&super::CONTEXT,
cache::$cache.get_entry_count(),
&[super::request::add_attributes(
"cache_type",
stringify!($cache),
)],
);
}
)*
};
}
pub fn spawn_metrics_collector(metrics_collection_interval_in_secs: &Option<u16>) {
let metrics_collection_interval = metrics_collection_interval_in_secs
.unwrap_or(DEFAULT_BG_METRICS_COLLECTION_INTERVAL_IN_SECS);
tokio::spawn(async move {
loop {
gauge_metrics_for_imc!(
CONFIG_CACHE,
ACCOUNTS_CACHE,
ROUTING_CACHE,
CGRAPH_CACHE,
PM_FILTERS_CGRAPH_CACHE,
DECISION_MANAGER_CACHE,
SURCHARGE_CACHE
);
tokio::time::sleep(std::time::Duration::from_secs(
metrics_collection_interval.into(),
))
.await
}
});
}

View File

@ -103,6 +103,8 @@ pub struct LogTelemetry {
pub use_xray_generator: bool,
/// Route Based Tracing
pub route_to_trace: Option<Vec<String>>,
/// Interval for collecting the metrics (such as gauge) in background thread
pub bg_metrics_collection_interval_in_secs: Option<u16>,
}
/// Telemetry / tracing.

View File

@ -101,3 +101,22 @@ macro_rules! histogram_metric_i64 {
> = once_cell::sync::Lazy::new(|| $meter.i64_histogram($description).init());
};
}
/// Create a [`ObservableGauge`][ObservableGauge] metric with the specified name and an optional description,
/// associated with the specified meter. Note that the meter must be to a valid [`Meter`][Meter].
///
/// [ObservableGauge]: opentelemetry::metrics::ObservableGauge
/// [Meter]: opentelemetry::metrics::Meter
#[macro_export]
macro_rules! gauge_metric {
($name:ident, $meter:ident) => {
pub(crate) static $name: once_cell::sync::Lazy<
$crate::opentelemetry::metrics::ObservableGauge<u64>,
> = once_cell::sync::Lazy::new(|| $meter.u64_observable_gauge(stringify!($name)).init());
};
($name:ident, $meter:ident, description:literal) => {
pub(crate) static $name: once_cell::sync::Lazy<
$crate::opentelemetry::metrics::ObservableGauge<u64>,
> = once_cell::sync::Lazy::new(|| $meter.u64_observable_gauge($description).init());
};
}

View File

@ -207,6 +207,16 @@ impl Cache {
pub async fn remove(&self, key: CacheKey) {
self.inner.invalidate::<String>(&key.into()).await;
}
/// Performs any pending maintenance operations needed by the cache.
pub async fn run_pending_tasks(&self) {
self.inner.run_pending_tasks().await;
}
/// Returns an approximate number of entries in this cache.
pub fn get_entry_count(&self) -> u64 {
self.inner.entry_count()
}
}
#[instrument(skip_all)]