refactor(config): add independent toggles for enabling traces and metrics (#1020)

This commit is contained in:
Sanchith Hegde
2023-05-03 12:15:03 +05:30
committed by GitHub
parent 0df2244794
commit af71828e35
14 changed files with 72 additions and 68 deletions

View File

@ -86,7 +86,9 @@ filtering_directive = "WARN,router=INFO,reqwest=INFO"
# Telemetry configuration for metrics and traces # Telemetry configuration for metrics and traces
[log.telemetry] [log.telemetry]
enabled = false # boolean [true or false] traces_enabled = false # boolean [true or false], whether traces are enabled
metrics_enabled = false # boolean [true or false], whether metrics are enabled
ignore_errors = false # boolean [true or false], whether to ignore errors during traces or metrics pipeline setup
sampling_rate = 0.1 # decimal rate between 0.0 - 1.0 sampling_rate = 0.1 # decimal rate between 0.0 - 1.0
otel_exporter_otlp_endpoint = "http://localhost:4317" # endpoint to send metrics and traces to, can include port number otel_exporter_otlp_endpoint = "http://localhost:4317" # endpoint to send metrics and traces to, can include port number
otel_exporter_otlp_timeout = 5000 # timeout (in milliseconds) for sending metrics and traces otel_exporter_otlp_timeout = 5000 # timeout (in milliseconds) for sending metrics and traces

View File

@ -7,7 +7,8 @@ level = "DEBUG"
log_format = "default" log_format = "default"
[log.telemetry] [log.telemetry]
enabled = false traces_enabled = false
metrics_enabled = false
# TODO: Update database credentials before running application # TODO: Update database credentials before running application
[master_database] [master_database]
@ -31,8 +32,6 @@ connection_timeout = 10
[secrets] [secrets]
admin_api_key = "test_admin" admin_api_key = "test_admin"
[proxy]
[locker] [locker]
host = "" host = ""
mock_locker = true mock_locker = true
@ -148,10 +147,10 @@ stripe = { banks = "abn_amro,asn_bank,bunq,handelsbanken,ing,knab,moneyou,raboba
adyen = { banks = "abn_amro,asn_bank,bunq,handelsbanken,ing,knab,moneyou,rabobank,regiobank,revolut,sns_bank,triodos_bank,van_lanschot" } adyen = { banks = "abn_amro,asn_bank,bunq,handelsbanken,ing,knab,moneyou,rabobank,regiobank,revolut,sns_bank,triodos_bank,van_lanschot" }
[bank_config.online_banking_czech_republic] [bank_config.online_banking_czech_republic]
adyen = { banks = "ceska_sporitelna,komercni_banka,platnosc_online_karta_platnicza"} adyen = { banks = "ceska_sporitelna,komercni_banka,platnosc_online_karta_platnicza" }
[bank_config.online_banking_slovakia] [bank_config.online_banking_slovakia]
adyen = { banks = "e_platby_v_u_b,postova_banka,sporo_pay,tatra_pay,viamo,volksbank_gruppe,volkskredit_bank_ag,vr_bank_braunau"} adyen = { banks = "e_platby_v_u_b,postova_banka,sporo_pay,tatra_pay,viamo,volksbank_gruppe,volkskredit_bank_ag,vr_bank_braunau" }
[pm_filters.stripe] [pm_filters.stripe]
google_pay = { country = "AL,DZ,AS,AO,AG,AR,AU,AT,AZ,BH,BY,BE,BR,BG,CA,CL,CO,HR,CZ,DK,DO,EG,EE,FI,FR,DE,GR,HK,HU,IN,ID,IE,IL,IT,JP,JO,KZ,KE,KW,LV,LB,LT,LU,MY,MX,NL,NZ,NO,OM,PK,PA,PE,PH,PL,PT,QA,RO,RU,SA,SG,SK,ZA,ES,LK,SE,CH,TW,TH,TR,UA,AE,GB,US,UY,VN" } google_pay = { country = "AL,DZ,AS,AO,AG,AR,AU,AT,AZ,BH,BY,BE,BR,BG,CA,CL,CO,HR,CZ,DK,DO,EG,EE,FI,FR,DE,GR,HK,HU,IN,ID,IE,IL,IT,JP,JO,KZ,KE,KW,LV,LB,LT,LU,MY,MX,NL,NZ,NO,OM,PK,PA,PE,PH,PL,PT,QA,RO,RU,SA,SG,SK,ZA,ES,LK,SE,CH,TW,TH,TR,UA,AE,GB,US,UY,VN" }
@ -195,5 +194,5 @@ bucket_name = ""
region = "" region = ""
[tokenization] [tokenization]
stripe = { long_lived_token = false, payment_method = "wallet"} stripe = { long_lived_token = false, payment_method = "wallet" }
checkout = { long_lived_token = false, payment_method = "wallet"} checkout = { long_lived_token = false, payment_method = "wallet" }

View File

@ -14,7 +14,10 @@ enabled = true # Whether you want to see log in your terminal.
level = "DEBUG" # What you see in your terminal. level = "DEBUG" # What you see in your terminal.
[log.telemetry] [log.telemetry]
enabled = false # Whether tracing/telemetry is enabled. traces_enabled = false # Whether traces are enabled.
metrics_enabled = false # Whether metrics are enabled.
ignore_errors = false # Whether to ignore errors during traces or metrics pipeline setup.
otel_exporter_otlp_endpoint = "https://otel-collector:4317" # Endpoint to send metrics and traces to.
[master_database] [master_database]
username = "db_user" username = "db_user"
@ -32,10 +35,6 @@ port = 5432
dbname = "hyperswitch_db" dbname = "hyperswitch_db"
pool_size = 5 pool_size = 5
[proxy]
# http_url = "http proxy URL"
# https_url = "https proxy URL"
[secrets] [secrets]
admin_api_key = "test_admin" admin_api_key = "test_admin"
jwt_secret = "secret" jwt_secret = "secret"

View File

@ -6,9 +6,7 @@ pub mod logger {
pub use router_env::{log, logger::*}; pub use router_env::{log, logger::*};
/// Setup logging sub-system /// Setup logging sub-system
pub fn setup( pub fn setup(conf: &config::Log) -> TelemetryGuard {
conf: &config::Log, router_env::setup(conf, router_env::service_name!(), [])
) -> error_stack::Result<TelemetryGuard, router_env::opentelemetry::metrics::MetricsError> {
Ok(router_env::setup(conf, router_env::service_name!(), [])?)
} }
} }

View File

@ -9,8 +9,6 @@ pub enum DrainerError {
RedisError(error_stack::Report<redis::errors::RedisError>), RedisError(error_stack::Report<redis::errors::RedisError>),
#[error("Application configuration error: {0}")] #[error("Application configuration error: {0}")]
ConfigurationError(config::ConfigError), ConfigurationError(config::ConfigError),
#[error("Metrics initialization error")]
MetricsError,
#[error("Error while configuring signals: {0}")] #[error("Error while configuring signals: {0}")]
SignalError(String), SignalError(String),
#[error("Unexpected error occurred: {0}")] #[error("Unexpected error occurred: {0}")]

View File

@ -1,5 +1,4 @@
use drainer::{errors, errors::DrainerResult, logger::logger, services, settings, start_drainer}; use drainer::{errors::DrainerResult, logger::logger, services, settings, start_drainer};
use error_stack::ResultExt;
#[tokio::main] #[tokio::main]
async fn main() -> DrainerResult<()> { async fn main() -> DrainerResult<()> {
@ -21,7 +20,7 @@ async fn main() -> DrainerResult<()> {
let shutdown_intervals = conf.drainer.shutdown_interval; let shutdown_intervals = conf.drainer.shutdown_interval;
let loop_interval = conf.drainer.loop_interval; let loop_interval = conf.drainer.loop_interval;
let _guard = logger::setup(&conf.log).change_context(errors::DrainerError::MetricsError)?; let _guard = logger::setup(&conf.log);
logger::info!("Drainer started [{:?}] [{:?}]", conf.drainer, conf.log); logger::info!("Drainer started [{:?}] [{:?}]", conf.drainer, conf.log);

View File

@ -34,7 +34,7 @@ async fn main() -> ApplicationResult<()> {
conf.validate() conf.validate()
.expect("Failed to validate router configuration"); .expect("Failed to validate router configuration");
let _guard = logger::setup(&conf.log)?; let _guard = logger::setup(&conf.log);
logger::info!("Application started [{:?}] [{:?}]", conf.server, conf.log); logger::info!("Application started [{:?}] [{:?}]", conf.server, conf.log);

View File

@ -28,8 +28,7 @@ async fn main() -> CustomResult<(), errors::ProcessTrackerError> {
redis_shutdown_signal_rx, redis_shutdown_signal_rx,
tx.clone(), tx.clone(),
)); ));
let _guard = let _guard = logger::setup(&state.conf.log);
logger::setup(&state.conf.log).map_err(|_| errors::ProcessTrackerError::UnexpectedFlow)?;
logger::debug!(startup_config=?state.conf); logger::debug!(startup_config=?state.conf);

View File

@ -4,12 +4,8 @@ pub mod logger {
#[doc(inline)] #[doc(inline)]
pub use router_env::{log, logger::*}; pub use router_env::{log, logger::*};
// TODO (prom-monitoring): Ideally tracing/opentelemetry structs shouldn't be pushed out.
// Return a custom error type instead of `opentelemetry::metrics::MetricsError`.
/// Setup logging sub-system. /// Setup logging sub-system.
pub fn setup( pub fn setup(conf: &config::Log) -> TelemetryGuard {
conf: &config::Log,
) -> Result<TelemetryGuard, router_env::opentelemetry::metrics::MetricsError> {
router_env::setup(conf, router_env::service_name!(), ["actix_server"]) router_env::setup(conf, router_env::service_name!(), ["actix_server"])
} }
} }

View File

@ -87,8 +87,12 @@ pub struct LogConsole {
#[derive(Debug, Deserialize, Clone, Default)] #[derive(Debug, Deserialize, Clone, Default)]
#[serde(default)] #[serde(default)]
pub struct LogTelemetry { pub struct LogTelemetry {
/// Whether tracing/telemetry is enabled. /// Whether the traces pipeline is enabled.
pub enabled: bool, pub traces_enabled: bool,
/// Whether the metrics pipeline is enabled.
pub metrics_enabled: bool,
/// Whether errors in setting up traces or metrics pipelines must be ignored.
pub ignore_errors: bool,
/// Sampling rate for traces /// Sampling rate for traces
pub sampling_rate: Option<f64>, pub sampling_rate: Option<f64>,
/// Base endpoint URL to send metrics and traces to. Can optionally include the port number. /// Base endpoint URL to send metrics and traces to. Can optionally include the port number.

View File

@ -10,7 +10,6 @@ use opentelemetry::{
propagation::TraceContextPropagator, propagation::TraceContextPropagator,
trace, Resource, trace, Resource,
}, },
trace::TraceError,
KeyValue, KeyValue,
}; };
use opentelemetry_otlp::{TonicExporterBuilder, WithExportConfig}; use opentelemetry_otlp::{TonicExporterBuilder, WithExportConfig};
@ -33,22 +32,19 @@ pub fn setup(
config: &config::Log, config: &config::Log,
service_name: &'static str, service_name: &'static str,
crates_to_filter: impl AsRef<[&'static str]>, crates_to_filter: impl AsRef<[&'static str]>,
) -> Result<TelemetryGuard, opentelemetry::metrics::MetricsError> { ) -> TelemetryGuard {
let mut guards = Vec::new(); let mut guards = Vec::new();
// Setup OpenTelemetry traces and metrics // Setup OpenTelemetry traces and metrics
let (telemetry_tracer, _metrics_controller) = if config.telemetry.enabled { let traces_layer = if config.telemetry.traces_enabled {
global::set_text_map_propagator(TraceContextPropagator::new()); setup_tracing_pipeline(&config.telemetry, service_name)
(
setup_tracing_pipeline(&config.telemetry, service_name),
setup_metrics_pipeline(&config.telemetry),
)
} else { } else {
(None, None) None
}; };
let telemetry_layer = match telemetry_tracer { let _metrics_controller = if config.telemetry.metrics_enabled {
Some(Ok(ref tracer)) => Some(tracing_opentelemetry::layer().with_tracer(tracer.clone())), setup_metrics_pipeline(&config.telemetry)
_ => None, } else {
None
}; };
// Setup file logging // Setup file logging
@ -75,7 +71,7 @@ pub fn setup(
}; };
let subscriber = tracing_subscriber::registry() let subscriber = tracing_subscriber::registry()
.with(telemetry_layer) .with(traces_layer)
.with(StorageSubscription) .with(StorageSubscription)
.with(file_writer); .with(file_writer);
@ -110,17 +106,12 @@ pub fn setup(
subscriber.init(); subscriber.init();
}; };
if let Some(Err(err)) = telemetry_tracer {
tracing::error!("Failed to create an opentelemetry_otlp tracer: {err}");
eprintln!("Failed to create an opentelemetry_otlp tracer: {err}");
}
// Returning the TelemetryGuard for logs to be printed and metrics to be collected until it is // Returning the TelemetryGuard for logs to be printed and metrics to be collected until it is
// dropped // dropped
Ok(TelemetryGuard { TelemetryGuard {
_log_guards: guards, _log_guards: guards,
_metrics_controller, _metrics_controller,
}) }
} }
fn get_opentelemetry_exporter(config: &config::LogTelemetry) -> TonicExporterBuilder { fn get_opentelemetry_exporter(config: &config::LogTelemetry) -> TonicExporterBuilder {
@ -139,7 +130,10 @@ fn get_opentelemetry_exporter(config: &config::LogTelemetry) -> TonicExporterBui
fn setup_tracing_pipeline( fn setup_tracing_pipeline(
config: &config::LogTelemetry, config: &config::LogTelemetry,
service_name: &'static str, service_name: &'static str,
) -> Option<Result<trace::Tracer, TraceError>> { ) -> Option<tracing_opentelemetry::OpenTelemetryLayer<tracing_subscriber::Registry, trace::Tracer>>
{
global::set_text_map_propagator(TraceContextPropagator::new());
let trace_config = trace::config() let trace_config = trace::config()
.with_sampler(trace::Sampler::TraceIdRatioBased( .with_sampler(trace::Sampler::TraceIdRatioBased(
config.sampling_rate.unwrap_or(1.0), config.sampling_rate.unwrap_or(1.0),
@ -148,14 +142,24 @@ fn setup_tracing_pipeline(
"service.name", "service.name",
service_name, service_name,
)])); )]));
let traces_layer_result = opentelemetry_otlp::new_pipeline()
let tracer = opentelemetry_otlp::new_pipeline()
.tracing() .tracing()
.with_exporter(get_opentelemetry_exporter(config)) .with_exporter(get_opentelemetry_exporter(config))
.with_trace_config(trace_config) .with_trace_config(trace_config)
.install_simple(); .install_simple()
.map(|tracer| tracing_opentelemetry::layer().with_tracer(tracer));
Some(tracer) if config.ignore_errors {
traces_layer_result
.map_err(|error| {
eprintln!("Failed to create an `opentelemetry_otlp` tracer: {error:?}")
})
.ok()
} else {
// Safety: This is conditional, there is an option to avoid this behavior at runtime.
#[allow(clippy::expect_used)]
Some(traces_layer_result.expect("Failed to create an `opentelemetry_otlp` tracer"))
}
} }
fn setup_metrics_pipeline(config: &config::LogTelemetry) -> Option<BasicController> { fn setup_metrics_pipeline(config: &config::LogTelemetry) -> Option<BasicController> {
@ -170,7 +174,7 @@ fn setup_metrics_pipeline(config: &config::LogTelemetry) -> Option<BasicControll
buckets buckets
}; };
opentelemetry_otlp::new_pipeline() let metrics_controller_result = opentelemetry_otlp::new_pipeline()
.metrics( .metrics(
simple::histogram(histogram_buckets), simple::histogram(histogram_buckets),
cumulative_temporality_selector(), cumulative_temporality_selector(),
@ -180,9 +184,17 @@ fn setup_metrics_pipeline(config: &config::LogTelemetry) -> Option<BasicControll
.with_exporter(get_opentelemetry_exporter(config)) .with_exporter(get_opentelemetry_exporter(config))
.with_period(Duration::from_secs(3)) .with_period(Duration::from_secs(3))
.with_timeout(Duration::from_secs(10)) .with_timeout(Duration::from_secs(10))
.build() .build();
.map_err(|err| eprintln!("Failed to setup metrics pipeline: {err:?}"))
.ok() if config.ignore_errors {
metrics_controller_result
.map_err(|error| eprintln!("Failed to setup metrics pipeline: {error:?}"))
.ok()
} else {
// Safety: This is conditional, there is an option to avoid this behavior at runtime.
#[allow(clippy::expect_used)]
Some(metrics_controller_result.expect("Failed to setup metrics pipeline"))
}
} }
fn get_envfilter( fn get_envfilter(

View File

@ -12,7 +12,7 @@ fn logger() -> &'static TelemetryGuard {
INSTANCE.get_or_init(|| { INSTANCE.get_or_init(|| {
let config = env::Config::new().unwrap(); let config = env::Config::new().unwrap();
env::logger::setup(&config.log, env::service_name!(), []).unwrap() env::logger::setup(&config.log, env::service_name!(), [])
}) })
} }

View File

@ -118,7 +118,6 @@ services:
- cargo_build_cache:/cargo_build_cache - cargo_build_cache:/cargo_build_cache
environment: environment:
- CARGO_TARGET_DIR=/cargo_build_cache - CARGO_TARGET_DIR=/cargo_build_cache
- OTEL_EXPORTER_OTLP_ENDPOINT=https://otel-collector:4317
labels: labels:
logs: "promtail" logs: "promtail"
healthcheck: healthcheck:
@ -146,7 +145,6 @@ services:
- p_cargo_build_cache:/cargo_build_cache - p_cargo_build_cache:/cargo_build_cache
environment: environment:
- CARGO_TARGET_DIR=/cargo_build_cache - CARGO_TARGET_DIR=/cargo_build_cache
- OTEL_EXPORTER_OTLP_ENDPOINT=https://otel-collector:4317
- SCHEDULER_FLOW=producer - SCHEDULER_FLOW=producer
depends_on: depends_on:
hyperswitch-consumer: hyperswitch-consumer:
@ -168,7 +166,6 @@ services:
- c_cargo_build_cache:/cargo_build_cache - c_cargo_build_cache:/cargo_build_cache
environment: environment:
- CARGO_TARGET_DIR=/cargo_build_cache - CARGO_TARGET_DIR=/cargo_build_cache
- OTEL_EXPORTER_OTLP_ENDPOINT=https://otel-collector:4317
- SCHEDULER_FLOW=consumer - SCHEDULER_FLOW=consumer
depends_on: depends_on:
hyperswitch-server: hyperswitch-server:
@ -288,7 +285,6 @@ services:
- cargo_build_cache:/cargo_build_cache - cargo_build_cache:/cargo_build_cache
environment: environment:
- CARGO_TARGET_DIR=/cargo_build_cache - CARGO_TARGET_DIR=/cargo_build_cache
- OTEL_EXPORTER_OTLP_ENDPOINT=https://otel-collector:4317
restart: unless-stopped restart: unless-stopped
depends_on: depends_on:
hyperswitch-server: hyperswitch-server:

View File

@ -5,7 +5,9 @@ enabled = false
enabled = false enabled = false
[log.telemetry] [log.telemetry]
enabled = true traces_enabled = true
metrics_enabled = true
ignore_errors = false
[master_database] [master_database]
username = "postgres" username = "postgres"
@ -119,5 +121,5 @@ cards = [
"trustpay", "trustpay",
"worldline", "worldline",
"worldpay", "worldpay",
"zen", "zen",
] ]