refactor(router_env): improve logging setup (#847)

This commit is contained in:
Sanchith Hegde
2023-04-11 19:44:48 +05:30
committed by GitHub
parent ce912dd852
commit 1b94d25f66
12 changed files with 274 additions and 203 deletions

17
Cargo.lock generated
View File

@ -1068,6 +1068,20 @@ dependencies = [
"serde_json",
]
[[package]]
name = "cargo_metadata"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eee4243f1f26fc7a42710e7439c149e2b10b05472f88090acce52632f231a73a"
dependencies = [
"camino",
"cargo-platform",
"semver",
"serde",
"serde_json",
"thiserror",
]
[[package]]
name = "cc"
version = "1.0.79"
@ -3540,6 +3554,7 @@ dependencies = [
name = "router_env"
version = "0.1.0"
dependencies = [
"cargo_metadata 0.15.4",
"config",
"gethostname",
"once_cell",
@ -4013,7 +4028,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8"
dependencies = [
"bytecount",
"cargo_metadata",
"cargo_metadata 0.14.2",
"error-chain",
"glob",
"pulldown-cmark",

View File

@ -61,6 +61,13 @@ file_name = "debug.log" # base name for log files.
# levels can be "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "OFF"
# defaults to "WARN"
level = "WARN"
# sets the log level for one or more crates
filtering_directive = "WARN,router=INFO,reqwest=INFO"
# ^^^^ ^^^^---------^^^^-- sets the log level for the
# | router and reqwest crates to INFO.
# |
# |______________________________ sets the log level for all
# other crates to WARN.
# Logging configuration for console logging
[log.console]
@ -69,11 +76,20 @@ log_format = "default" # Log format. "default" or "json"
# levels can be "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "OFF"
# defaults to "WARN"
level = "DEBUG"
# sets the log level for one or more crates
filtering_directive = "WARN,router=INFO,reqwest=INFO"
# ^^^^ ^^^^---------^^^^-- sets the log level for the
# | router and reqwest crates to INFO.
# |
# |______________________________ sets the log level for all
# other crates to WARN.
# Telemetry configuration for traces
# Telemetry configuration for metrics and traces
[log.telemetry]
enabled = false # boolean [true or false]
sampling_rate = 0.1 # decimal rate between 0.0 - 1.0
otel_exporter_otlp_endpoint = "http://localhost:4317" # endpoint to send metrics and traces to, can include port number
otel_exporter_otlp_timeout = 5000 # timeout (in milliseconds) for sending metrics and traces
# This section provides some secret values.
[secrets]

View File

@ -5,24 +5,10 @@ pub mod logger {
#[doc(inline)]
pub use router_env::{log, logger::*};
///
/// Setup logging sub-system
///
///
pub fn setup(
conf: &config::Log,
) -> error_stack::Result<TelemetryGuard, router_env::opentelemetry::metrics::MetricsError> {
Ok(router_env::setup(
conf,
"drainer",
vec![
"drainer",
"common_utils",
"external_services",
"redis_interface",
"router_env",
"storage_models",
],
)?)
Ok(router_env::setup(conf, router_env::service_name!(), [])?)
}
}

View File

@ -4,29 +4,12 @@ pub mod logger {
#[doc(inline)]
pub use router_env::{log, logger::*};
///
/// Setup logging sub-system.
///
// TODO (prom-monitoring): Ideally tracing/opentelementry structs shouldn't be pushed out.
// TODO (prom-monitoring): Ideally tracing/opentelemetry structs shouldn't be pushed out.
// Return a custom error type instead of `opentelemetry::metrics::MetricsError`.
/// Setup logging sub-system.
pub fn setup(
conf: &config::Log,
) -> Result<TelemetryGuard, router_env::opentelemetry::metrics::MetricsError> {
router_env::setup(
conf,
"router",
vec![
"router",
"actix_server",
"api_models",
"common_utils",
"external_services",
"masking",
"redis_interface",
"router_derive",
"router_env",
"storage_models",
],
)
router_env::setup(conf, router_env::service_name!(), ["actix_server"])
}
}

View File

@ -6,7 +6,6 @@ edition = "2021"
rust-version = "1.65"
readme = "README.md"
license = "Apache-2.0"
build = "src/build.rs"
[dependencies]
config = { version = "0.13.3", features = ["toml"] }
@ -26,13 +25,14 @@ tracing-actix-web = { version = "0.7.2", features = ["opentelemetry_0_18"], opti
tracing-appender = { version = "0.2.2" }
tracing-attributes = "=0.1.22"
tracing-opentelemetry = { version = "0.18.0" }
tracing-subscriber = { version = "0.3.16", default-features = true, features = ["json", "env-filter", "registry"] }
tracing-subscriber = { version = "0.3.16", default-features = true, features = ["env-filter", "json", "registry"] }
vergen = { version = "8.0.0-beta.9", optional = true, features = ["cargo", "git", "git2", "rustc"] }
[dev-dependencies]
tokio = { version = "1.26.0", features = ["macros", "rt-multi-thread"] }
[build-dependencies]
cargo_metadata = "0.15.4"
vergen = { version = "8.0.0-beta.9", features = ["cargo", "git", "git2", "rustc"], optional = true }
[features]

View File

@ -0,0 +1,36 @@
include!("src/vergen.rs");
fn main() {
generate_cargo_instructions();
#[allow(clippy::expect_used)] // Safety: panicking in build scripts is okay for us
let metadata = cargo_metadata::MetadataCommand::new()
.exec()
.expect("Failed to obtain cargo metadata");
let workspace_members = metadata.workspace_members;
let package_id_entry_prefix =
format!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"));
assert!(
workspace_members
.iter()
.any(|package_id| package_id.repr.starts_with(&package_id_entry_prefix)),
"Unknown workspace members package ID format. \
Please run `cargo metadata --format-version=1 | jq '.workspace_members'` and update this \
build script to match the updated package ID format."
);
let workspace_members = workspace_members
.iter()
.map(|package_id| {
#[allow(clippy::expect_used)] // Safety: panicking in build scripts is okay for us
package_id
.repr
.split_once(' ')
.expect("Unknown cargo metadata package ID format")
.0
})
.collect::<Vec<_>>()
.join(",");
println!("cargo:rustc-env=CARGO_WORKSPACE_MEMBERS={workspace_members}");
}

View File

@ -1,5 +0,0 @@
include!("vergen.rs");
fn main() {
generate_cargo_instructions();
}

View File

@ -1,21 +1,24 @@
//!
//! Current environment related stuff.
//!
//! Information about the current environment.
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use strum::{Display, EnumString};
/// Parent dir where Cargo.toml is stored
pub const CARGO_MANIFEST_DIR: &str = "CARGO_MANIFEST_DIR";
/// Env variable that sets Development/Production env
pub const RUN_ENV: &str = "RUN_ENV";
/// Environment variables accessed by the application. This module aims to be the source of truth
/// containing all environment variable that the application accesses.
pub mod vars {
/// Parent directory where `Cargo.toml` is stored.
pub const CARGO_MANIFEST_DIR: &str = "CARGO_MANIFEST_DIR";
/// Environment variable that sets Development/Sandbox/Production environment.
pub const RUN_ENV: &str = "RUN_ENV";
/// Directory of config TOML files. Default is `config`.
pub const CONFIG_DIR: &str = "CONFIG_DIR";
}
///
/// Current environment.
///
#[derive(Debug, Default, Deserialize, Serialize, Clone, Copy, Display, EnumString)]
pub enum Env {
/// Development environment.
@ -34,7 +37,7 @@ pub fn which() -> Env {
#[cfg(not(debug_assertions))]
let default_env = Env::Production;
std::env::var(RUN_ENV).map_or_else(|_| default_env, |v| v.parse().unwrap_or(default_env))
std::env::var(vars::RUN_ENV).map_or_else(|_| default_env, |v| v.parse().unwrap_or(default_env))
}
/// Three letter (lowercase) prefix corresponding to the current environment.
@ -47,33 +50,11 @@ pub fn prefix_for_env() -> &'static str {
}
}
///
/// Base path to look for config and logs directories.
/// Application expects to find `./config/` and `./logs/` relative this directories.
///
/// Using workspace and splitting into monolith into crates introduce introduce extra level of complexity.
/// We can't rely on current working directory anymore because there are several ways of running applications.
///
/// Developer can run application from root of repository:
/// ```bash
/// cargo run
/// ```
///
/// Alternatively, developer can run from directory of crate:
/// ```bash
/// cd crates/router
/// cargo run
/// ```
///
/// Config and log files are located at root. No matter application is run it should work properly.
/// `router_log::env::workspace_path` takes care of the problem returning path tho workspace relative which should other paths be calculated.
///
/// Path to the root directory of the cargo workspace.
/// It is recommended that this be used by the application as the base path to build other paths
/// such as configuration and logs directories.
pub fn workspace_path() -> PathBuf {
// for (key, value) in std::env::vars() {
// println!("{key} : {value}");
// }
if let Ok(manifest_dir) = std::env::var(CARGO_MANIFEST_DIR) {
if let Ok(manifest_dir) = std::env::var(vars::CARGO_MANIFEST_DIR) {
let mut path = PathBuf::from(manifest_dir);
path.pop();
path.pop();
@ -105,10 +86,9 @@ macro_rules! version {
};
}
/// A string uniquely identifying the application build.
///
/// A string uniquely identify build of the service.
///
/// Consists of combination of
/// Consists of a combination of:
/// - Version defined in the crate file
/// - Timestamp of commit
/// - Hash of the commit
@ -116,8 +96,6 @@ macro_rules! version {
/// - Target triple
///
/// Example: `0.1.0-f5f383e-2022-09-04T11:39:37Z-1.63.0-x86_64-unknown-linux-gnu`
///
#[cfg(feature = "vergen")]
#[macro_export]
macro_rules! build {
@ -138,11 +116,9 @@ macro_rules! build {
};
}
/// Short hash of the current commit.
///
/// Full hash of the current commit.
///
/// Example: `f5f383ee7e36214d60ce3c6353b57db03ff0ceb1`.
///
/// Example: `f5f383e`.
#[cfg(feature = "vergen")]
#[macro_export]
macro_rules! commit {
@ -151,13 +127,11 @@ macro_rules! commit {
};
}
// ///
// /// Information about the platform on which service was built, including:
// /// - Information about OS
// /// - Information about CPU
// ///
// /// Example: ``.
// ///
// #[macro_export]
// macro_rules! platform {
// (
@ -170,12 +144,9 @@ macro_rules! commit {
// };
// }
///
/// Service name deduced from name of the crate.
///
/// Example: `router`.
///
#[macro_export]
macro_rules! service_name {
() => {
@ -183,12 +154,9 @@ macro_rules! service_name {
};
}
///
/// Build profile, either debug or release.
///
/// Example: `release`.
///
#[macro_export]
macro_rules! profile {
() => {

View File

@ -6,9 +6,6 @@ use std::path::PathBuf;
use serde::Deserialize;
/// Directory of config toml files. Default is config
pub const CONFIG_DIR: &str = "CONFIG_DIR";
/// Config settings.
#[derive(Debug, Deserialize, Clone)]
pub struct Config {
@ -38,14 +35,16 @@ pub struct LogFile {
pub path: String,
/// Name of log file without suffix.
pub file_name: String,
// pub do_async: bool, // is not used
/// What gets into log files.
pub level: Level,
/// Directive which sets the log level for one or more crates/modules.
pub filtering_directive: Option<String>,
// pub do_async: bool, // is not used
// pub rotation: u16,
}
/// Describes the level of verbosity of a span or event.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Copy)]
pub struct Level(pub(super) tracing::Level);
impl Level {
@ -80,6 +79,8 @@ pub struct LogConsole {
/// Log format
#[serde(default)]
pub log_format: LogFormat,
/// Directive which sets the log level for one or more crates/modules.
pub filtering_directive: Option<String>,
}
/// Telemetry / tracing.
@ -90,6 +91,10 @@ pub struct LogTelemetry {
pub enabled: bool,
/// Sampling rate for traces
pub sampling_rate: Option<f64>,
/// Base endpoint URL to send metrics and traces to. Can optionally include the port number.
pub otel_exporter_otlp_endpoint: Option<String>,
/// Timeout (in milliseconds) for sending metrics and traces.
pub otel_exporter_otlp_timeout: Option<u64>,
}
/// Telemetry / tracing.
@ -158,7 +163,8 @@ impl Config {
if let Some(explicit_config_path_val) = explicit_config_path {
config_path.push(explicit_config_path_val);
} else {
let config_directory = std::env::var(CONFIG_DIR).unwrap_or_else(|_| "config".into());
let config_directory =
std::env::var(crate::env::vars::CONFIG_DIR).unwrap_or_else(|_| "config".into());
let config_file_name = match environment {
"Production" => "Production.toml",
"Sandbox" => "Sandbox.toml",

View File

@ -5,6 +5,7 @@ impl Default for super::config::LogFile {
path: "logs".into(),
file_name: "debug.log".into(),
level: super::config::Level(tracing::Level::DEBUG),
filtering_directive: None,
}
}
}
@ -15,6 +16,7 @@ impl Default for super::config::LogConsole {
enabled: false,
level: super::config::Level(tracing::Level::INFO),
log_format: super::config::LogFormat::Json,
filtering_directive: None,
}
}
}

View File

@ -1,9 +1,7 @@
//!
//! Setup logging subsystem.
//!
use std::{path::PathBuf, time::Duration};
use once_cell::sync::Lazy;
use std::{collections::HashSet, time::Duration};
use opentelemetry::{
global, runtime,
sdk::{
@ -12,108 +10,99 @@ use opentelemetry::{
propagation::TraceContextPropagator,
trace, Resource,
},
trace::TraceError,
KeyValue,
};
use opentelemetry_otlp::WithExportConfig;
use opentelemetry_otlp::{TonicExporterBuilder, WithExportConfig};
use tracing_appender::non_blocking::WorkerGuard;
use tracing_subscriber::{filter, fmt, prelude::*, util::SubscriberInitExt, EnvFilter, Layer};
use tracing_subscriber::{fmt, prelude::*, util::SubscriberInitExt, EnvFilter, Layer};
use crate::{config, FormattingLayer, Level, StorageSubscription};
use crate::{config, FormattingLayer, StorageSubscription};
/// TelemetryGuard which helps with
/// Contains guards necessary for logging and metrics collection.
#[derive(Debug)]
pub struct TelemetryGuard {
_log_guards: Vec<WorkerGuard>,
_metric_controller: Option<BasicController>,
_metrics_controller: Option<BasicController>,
}
///
/// Setup logging sub-system specifying.
/// Expects config and list of names of crates to watch.
///
pub fn setup<Str: AsRef<str>>(
conf: &config::Log,
service_name: &str,
crates_to_watch: Vec<Str>,
/// Setup logging sub-system specifying the logging configuration, service (binary) name, and a
/// list of external crates for which a more verbose logging must be enabled. All crates within the
/// current cargo workspace are automatically considered for verbose logging.
pub fn setup(
config: &config::Log,
service_name: &'static str,
crates_to_filter: impl AsRef<[&'static str]>,
) -> Result<TelemetryGuard, opentelemetry::metrics::MetricsError> {
let mut guards = Vec::new();
// Setup OpenTelemetry traces and metrics
let (telemetry_tracer, _metrics_controller) = if config.telemetry.enabled {
global::set_text_map_propagator(TraceContextPropagator::new());
let telemetry = if conf.telemetry.enabled {
let trace_config = trace::config()
.with_sampler(trace::Sampler::TraceIdRatioBased(
conf.telemetry.sampling_rate.unwrap_or(1.0),
))
.with_resource(Resource::new(vec![KeyValue::new("service.name", "router")]));
let tracer = opentelemetry_otlp::new_pipeline()
.tracing()
.with_exporter(opentelemetry_otlp::new_exporter().tonic().with_env())
.with_trace_config(trace_config)
.install_simple();
Some(tracer)
(
setup_tracing_pipeline(&config.telemetry, service_name),
setup_metrics_pipeline(&config.telemetry),
)
} else {
None
(None, None)
};
let file_writer = if conf.file.enabled {
let mut path: PathBuf = conf.file.path.clone().into();
path.push(crate::env::workspace_path());
path.push(&conf.file.path);
// println!("{:?} + {:?}", &path, &conf.file.file_name);
let file_appender = tracing_appender::rolling::hourly(&path, &conf.file.file_name);
let (file_writer, guard) = tracing_appender::non_blocking(file_appender);
guards.push(guard);
let file_filter = filter::Targets::new().with_default(conf.file.level.into_level());
let file_layer = FormattingLayer::new(service_name, file_writer).with_filter(file_filter);
Some(file_layer)
} else {
None
};
let telemetry_layer = match telemetry {
let telemetry_layer = match telemetry_tracer {
Some(Ok(ref tracer)) => Some(tracing_opentelemetry::layer().with_tracer(tracer.clone())),
_ => None,
};
// Use 'RUST_LOG' environment variable will override the config settings
// Setup file logging
let file_writer = if config.file.enabled {
let mut path = crate::env::workspace_path();
// Using an absolute path for file log path would replace workspace path with absolute path,
// which is the intended behavior for us.
path.push(&config.file.path);
let file_appender = tracing_appender::rolling::hourly(&path, &config.file.file_name);
let (file_writer, guard) = tracing_appender::non_blocking(file_appender);
guards.push(guard);
let file_filter = get_envfilter(
config.file.filtering_directive.as_ref(),
config::Level(tracing::Level::WARN),
config.file.level,
&crates_to_filter,
);
Some(FormattingLayer::new(service_name, file_writer).with_filter(file_filter))
} else {
None
};
let subscriber = tracing_subscriber::registry()
.with(telemetry_layer)
.with(StorageSubscription)
.with(file_writer)
.with(
EnvFilter::builder()
.with_default_directive(Level::TRACE.into())
.from_env_lossy(),
);
.with(file_writer);
if conf.console.enabled {
// Setup console logging
if config.console.enabled {
let (console_writer, guard) = tracing_appender::non_blocking(std::io::stdout());
guards.push(guard);
let level = conf.console.level.into_level();
let mut console_filter = filter::Targets::new().with_default(Level::WARN);
for acrate in crates_to_watch {
console_filter = console_filter.with_target(acrate.as_ref(), level);
}
let console_filter = get_envfilter(
config.console.filtering_directive.as_ref(),
config::Level(tracing::Level::WARN),
config.console.level,
&crates_to_filter,
);
match conf.console.log_format {
match config.console.log_format {
config::LogFormat::Default => {
let logging_layer = fmt::layer()
.with_timer(fmt::time::time())
.with_span_events(fmt::format::FmtSpan::NONE)
.pretty()
.with_writer(console_writer);
subscriber
.with(logging_layer.with_filter(console_filter))
.init();
.with_writer(console_writer)
.with_filter(console_filter);
subscriber.with(logging_layer).init();
}
config::LogFormat::Json => {
let logging_layer = FormattingLayer::new(service_name, console_writer);
let logging_layer =
FormattingLayer::new(service_name, console_writer).with_filter(console_filter);
subscriber.with(logging_layer).init();
}
}
@ -121,18 +110,56 @@ pub fn setup<Str: AsRef<str>>(
subscriber.init();
};
if let Some(Err(err)) = telemetry {
if let Some(Err(err)) = telemetry_tracer {
tracing::error!("Failed to create an opentelemetry_otlp tracer: {err}");
eprintln!("Failed to create an opentelemetry_otlp tracer: {err}");
}
// Returning the WorkerGuard for logs to be printed until it is dropped
// Returning the TelemetryGuard for logs to be printed and metrics to be collected until it is
// dropped
Ok(TelemetryGuard {
_log_guards: guards,
_metric_controller: setup_metrics(),
_metrics_controller,
})
}
static HISTOGRAM_BUCKETS: Lazy<[f64; 15]> = Lazy::new(|| {
fn get_opentelemetry_exporter(config: &config::LogTelemetry) -> TonicExporterBuilder {
let mut exporter_builder = opentelemetry_otlp::new_exporter().tonic();
if let Some(ref endpoint) = config.otel_exporter_otlp_endpoint {
exporter_builder = exporter_builder.with_endpoint(endpoint);
}
if let Some(timeout) = config.otel_exporter_otlp_timeout {
exporter_builder = exporter_builder.with_timeout(Duration::from_millis(timeout));
}
exporter_builder
}
fn setup_tracing_pipeline(
config: &config::LogTelemetry,
service_name: &'static str,
) -> Option<Result<trace::Tracer, TraceError>> {
let trace_config = trace::config()
.with_sampler(trace::Sampler::TraceIdRatioBased(
config.sampling_rate.unwrap_or(1.0),
))
.with_resource(Resource::new(vec![KeyValue::new(
"service.name",
service_name,
)]));
let tracer = opentelemetry_otlp::new_pipeline()
.tracing()
.with_exporter(get_opentelemetry_exporter(config))
.with_trace_config(trace_config)
.install_simple();
Some(tracer)
}
fn setup_metrics_pipeline(config: &config::LogTelemetry) -> Option<BasicController> {
let histogram_buckets = {
let mut init = 0.01;
let mut buckets: [f64; 15] = [0.0; 15];
@ -141,21 +168,63 @@ static HISTOGRAM_BUCKETS: Lazy<[f64; 15]> = Lazy::new(|| {
*bucket = init;
}
buckets
});
};
fn setup_metrics() -> Option<BasicController> {
opentelemetry_otlp::new_pipeline()
.metrics(
simple::histogram(*HISTOGRAM_BUCKETS),
simple::histogram(histogram_buckets),
cumulative_temporality_selector(),
// This would have to be updated if a different web framework is used
runtime::TokioCurrentThread,
)
.with_exporter(
opentelemetry_otlp::new_exporter().tonic().with_env(), // can also config it using with_* functions like the tracing part above.
)
.with_exporter(get_opentelemetry_exporter(config))
.with_period(Duration::from_secs(3))
.with_timeout(Duration::from_secs(10))
.build()
.map_err(|err| eprintln!("Failed to Setup Metrics with {err:?}"))
.map_err(|err| eprintln!("Failed to setup metrics pipeline: {err:?}"))
.ok()
}
fn get_envfilter(
filtering_directive: Option<&String>,
default_log_level: config::Level,
filter_log_level: config::Level,
crates_to_filter: impl AsRef<[&'static str]>,
) -> EnvFilter {
filtering_directive
.map(|filter| {
// Try to create target filter from specified filtering directive, if set
// Safety: If user is overriding the default filtering directive, then we need to panic
// for invalid directives.
#[allow(clippy::expect_used)]
EnvFilter::builder()
.with_default_directive(default_log_level.into_level().into())
.parse(filter)
.expect("Invalid EnvFilter filtering directive")
})
.unwrap_or_else(|| {
// Construct a default target filter otherwise
let mut workspace_members = std::env!("CARGO_WORKSPACE_MEMBERS")
.split(',')
.collect::<HashSet<_>>();
workspace_members.extend(crates_to_filter.as_ref());
workspace_members
.drain()
.zip(std::iter::repeat(filter_log_level.into_level()))
.fold(
EnvFilter::default().add_directive(default_log_level.into_level().into()),
|env_filter, (target, level)| {
// Safety: This is a hardcoded basic filtering directive. If even the basic
// filter is wrong, it's better to panic.
#[allow(clippy::expect_used)]
env_filter.add_directive(
format!("{target}={level}")
.parse()
.expect("Invalid EnvFilter directive format"),
)
},
)
})
}

View File

@ -12,12 +12,7 @@ fn logger() -> &'static TelemetryGuard {
INSTANCE.get_or_init(|| {
let config = env::Config::new().unwrap();
env::logger::setup(
&config.log,
env::service_name!(),
vec![env::service_name!()],
)
.unwrap()
env::logger::setup(&config.log, env::service_name!(), []).unwrap()
})
}