feat(analytics): Add Clickhouse based analytics (#2988)

Co-authored-by: harsh_sharma_juspay <harsh.sharma@juspay.in>
Co-authored-by: Ivor Dsouza <ivor.dsouza@juspay.in>
Co-authored-by: Chethan Rao <70657455+Chethan-rao@users.noreply.github.com>
Co-authored-by: nain-F49FF806 <126972030+nain-F49FF806@users.noreply.github.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: hyperswitch-bot[bot] <148525504+hyperswitch-bot[bot]@users.noreply.github.com>
Co-authored-by: akshay.s <akshay.s@juspay.in>
Co-authored-by: Gnanasundari24 <118818938+Gnanasundari24@users.noreply.github.com>
This commit is contained in:
Sampras Lopes
2023-11-29 17:04:53 +05:30
committed by GitHub
parent 2e57745352
commit 9df4e0193f
135 changed files with 12145 additions and 901 deletions

509
crates/analytics/src/lib.rs Normal file
View File

@ -0,0 +1,509 @@
mod clickhouse;
pub mod core;
pub mod errors;
pub mod metrics;
pub mod payments;
mod query;
pub mod refunds;
pub mod api_event;
pub mod sdk_events;
mod sqlx;
mod types;
use api_event::metrics::{ApiEventMetric, ApiEventMetricRow};
pub use types::AnalyticsDomain;
pub mod lambda_utils;
pub mod utils;
use std::sync::Arc;
use api_models::analytics::{
api_event::{
ApiEventDimensions, ApiEventFilters, ApiEventMetrics, ApiEventMetricsBucketIdentifier,
},
payments::{PaymentDimensions, PaymentFilters, PaymentMetrics, PaymentMetricsBucketIdentifier},
refunds::{RefundDimensions, RefundFilters, RefundMetrics, RefundMetricsBucketIdentifier},
sdk_events::{
SdkEventDimensions, SdkEventFilters, SdkEventMetrics, SdkEventMetricsBucketIdentifier,
},
Distribution, Granularity, TimeRange,
};
use clickhouse::ClickhouseClient;
pub use clickhouse::ClickhouseConfig;
use error_stack::IntoReport;
use router_env::{
logger,
tracing::{self, instrument},
};
use storage_impl::config::Database;
use self::{
payments::{
distribution::{PaymentDistribution, PaymentDistributionRow},
metrics::{PaymentMetric, PaymentMetricRow},
},
refunds::metrics::{RefundMetric, RefundMetricRow},
sdk_events::metrics::{SdkEventMetric, SdkEventMetricRow},
sqlx::SqlxClient,
types::MetricsError,
};
#[derive(Clone, Debug)]
pub enum AnalyticsProvider {
Sqlx(SqlxClient),
Clickhouse(ClickhouseClient),
CombinedCkh(SqlxClient, ClickhouseClient),
CombinedSqlx(SqlxClient, ClickhouseClient),
}
impl Default for AnalyticsProvider {
fn default() -> Self {
Self::Sqlx(SqlxClient::default())
}
}
impl ToString for AnalyticsProvider {
fn to_string(&self) -> String {
String::from(match self {
Self::Clickhouse(_) => "Clickhouse",
Self::Sqlx(_) => "Sqlx",
Self::CombinedCkh(_, _) => "CombinedCkh",
Self::CombinedSqlx(_, _) => "CombinedSqlx",
})
}
}
impl AnalyticsProvider {
#[instrument(skip_all)]
pub async fn get_payment_metrics(
&self,
metric: &PaymentMetrics,
dimensions: &[PaymentDimensions],
merchant_id: &str,
filters: &PaymentFilters,
granularity: &Option<Granularity>,
time_range: &TimeRange,
) -> types::MetricsResult<Vec<(PaymentMetricsBucketIdentifier, PaymentMetricRow)>> {
// Metrics to get the fetch time for each payment metric
metrics::request::record_operation_time(
async {
match self {
Self::Sqlx(pool) => {
metric
.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
pool,
)
.await
}
Self::Clickhouse(pool) => {
metric
.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
pool,
)
.await
}
Self::CombinedCkh(sqlx_pool, ckh_pool) => {
let (ckh_result, sqlx_result) = tokio::join!(metric
.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
ckh_pool,
),
metric
.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
sqlx_pool,
));
match (&sqlx_result, &ckh_result) {
(Ok(ref sqlx_res), Ok(ref ckh_res)) if sqlx_res != ckh_res => {
router_env::logger::error!(clickhouse_result=?ckh_res, postgres_result=?sqlx_res, "Mismatch between clickhouse & postgres payments analytics metrics")
},
_ => {}
};
ckh_result
}
Self::CombinedSqlx(sqlx_pool, ckh_pool) => {
let (ckh_result, sqlx_result) = tokio::join!(metric
.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
ckh_pool,
),
metric
.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
sqlx_pool,
));
match (&sqlx_result, &ckh_result) {
(Ok(ref sqlx_res), Ok(ref ckh_res)) if sqlx_res != ckh_res => {
router_env::logger::error!(clickhouse_result=?ckh_res, postgres_result=?sqlx_res, "Mismatch between clickhouse & postgres payments analytics metrics")
},
_ => {}
};
sqlx_result
}
}
},
&metrics::METRIC_FETCH_TIME,
metric,
self,
)
.await
}
pub async fn get_payment_distribution(
&self,
distribution: &Distribution,
dimensions: &[PaymentDimensions],
merchant_id: &str,
filters: &PaymentFilters,
granularity: &Option<Granularity>,
time_range: &TimeRange,
) -> types::MetricsResult<Vec<(PaymentMetricsBucketIdentifier, PaymentDistributionRow)>> {
// Metrics to get the fetch time for each payment metric
metrics::request::record_operation_time(
async {
match self {
Self::Sqlx(pool) => {
distribution.distribution_for
.load_distribution(
distribution,
dimensions,
merchant_id,
filters,
granularity,
time_range,
pool,
)
.await
}
Self::Clickhouse(pool) => {
distribution.distribution_for
.load_distribution(
distribution,
dimensions,
merchant_id,
filters,
granularity,
time_range,
pool,
)
.await
}
Self::CombinedCkh(sqlx_pool, ckh_pool) => {
let (ckh_result, sqlx_result) = tokio::join!(distribution.distribution_for
.load_distribution(
distribution,
dimensions,
merchant_id,
filters,
granularity,
time_range,
ckh_pool,
),
distribution.distribution_for
.load_distribution(
distribution,
dimensions,
merchant_id,
filters,
granularity,
time_range,
sqlx_pool,
));
match (&sqlx_result, &ckh_result) {
(Ok(ref sqlx_res), Ok(ref ckh_res)) if sqlx_res != ckh_res => {
router_env::logger::error!(clickhouse_result=?ckh_res, postgres_result=?sqlx_res, "Mismatch between clickhouse & postgres payments analytics distribution")
},
_ => {}
};
ckh_result
}
Self::CombinedSqlx(sqlx_pool, ckh_pool) => {
let (ckh_result, sqlx_result) = tokio::join!(distribution.distribution_for
.load_distribution(
distribution,
dimensions,
merchant_id,
filters,
granularity,
time_range,
ckh_pool,
),
distribution.distribution_for
.load_distribution(
distribution,
dimensions,
merchant_id,
filters,
granularity,
time_range,
sqlx_pool,
));
match (&sqlx_result, &ckh_result) {
(Ok(ref sqlx_res), Ok(ref ckh_res)) if sqlx_res != ckh_res => {
router_env::logger::error!(clickhouse_result=?ckh_res, postgres_result=?sqlx_res, "Mismatch between clickhouse & postgres payments analytics distribution")
},
_ => {}
};
sqlx_result
}
}
},
&metrics::METRIC_FETCH_TIME,
&distribution.distribution_for,
self,
)
.await
}
pub async fn get_refund_metrics(
&self,
metric: &RefundMetrics,
dimensions: &[RefundDimensions],
merchant_id: &str,
filters: &RefundFilters,
granularity: &Option<Granularity>,
time_range: &TimeRange,
) -> types::MetricsResult<Vec<(RefundMetricsBucketIdentifier, RefundMetricRow)>> {
// Metrics to get the fetch time for each refund metric
metrics::request::record_operation_time(
async {
match self {
Self::Sqlx(pool) => {
metric
.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
pool,
)
.await
}
Self::Clickhouse(pool) => {
metric
.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
pool,
)
.await
}
Self::CombinedCkh(sqlx_pool, ckh_pool) => {
let (ckh_result, sqlx_result) = tokio::join!(
metric.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
ckh_pool,
),
metric.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
sqlx_pool,
)
);
match (&sqlx_result, &ckh_result) {
(Ok(ref sqlx_res), Ok(ref ckh_res)) if sqlx_res != ckh_res => {
logger::error!(clickhouse_result=?ckh_res, postgres_result=?sqlx_res, "Mismatch between clickhouse & postgres refunds analytics metrics")
}
_ => {}
};
ckh_result
}
Self::CombinedSqlx(sqlx_pool, ckh_pool) => {
let (ckh_result, sqlx_result) = tokio::join!(
metric.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
ckh_pool,
),
metric.load_metrics(
dimensions,
merchant_id,
filters,
granularity,
time_range,
sqlx_pool,
)
);
match (&sqlx_result, &ckh_result) {
(Ok(ref sqlx_res), Ok(ref ckh_res)) if sqlx_res != ckh_res => {
logger::error!(clickhouse_result=?ckh_res, postgres_result=?sqlx_res, "Mismatch between clickhouse & postgres refunds analytics metrics")
}
_ => {}
};
sqlx_result
}
}
},
&metrics::METRIC_FETCH_TIME,
metric,
self,
)
.await
}
pub async fn get_sdk_event_metrics(
&self,
metric: &SdkEventMetrics,
dimensions: &[SdkEventDimensions],
pub_key: &str,
filters: &SdkEventFilters,
granularity: &Option<Granularity>,
time_range: &TimeRange,
) -> types::MetricsResult<Vec<(SdkEventMetricsBucketIdentifier, SdkEventMetricRow)>> {
match self {
Self::Sqlx(_pool) => Err(MetricsError::NotImplemented).into_report(),
Self::Clickhouse(pool) => {
metric
.load_metrics(dimensions, pub_key, filters, granularity, time_range, pool)
.await
}
Self::CombinedCkh(_sqlx_pool, ckh_pool) | Self::CombinedSqlx(_sqlx_pool, ckh_pool) => {
metric
.load_metrics(
dimensions,
pub_key,
filters,
granularity,
// Since SDK events are ckh only use ckh here
time_range,
ckh_pool,
)
.await
}
}
}
pub async fn get_api_event_metrics(
&self,
metric: &ApiEventMetrics,
dimensions: &[ApiEventDimensions],
pub_key: &str,
filters: &ApiEventFilters,
granularity: &Option<Granularity>,
time_range: &TimeRange,
) -> types::MetricsResult<Vec<(ApiEventMetricsBucketIdentifier, ApiEventMetricRow)>> {
match self {
Self::Sqlx(_pool) => Err(MetricsError::NotImplemented).into_report(),
Self::Clickhouse(ckh_pool)
| Self::CombinedCkh(_, ckh_pool)
| Self::CombinedSqlx(_, ckh_pool) => {
// Since API events are ckh only use ckh here
metric
.load_metrics(
dimensions,
pub_key,
filters,
granularity,
time_range,
ckh_pool,
)
.await
}
}
}
pub async fn from_conf(config: &AnalyticsConfig) -> Self {
match config {
AnalyticsConfig::Sqlx { sqlx } => Self::Sqlx(SqlxClient::from_conf(sqlx).await),
AnalyticsConfig::Clickhouse { clickhouse } => Self::Clickhouse(ClickhouseClient {
config: Arc::new(clickhouse.clone()),
}),
AnalyticsConfig::CombinedCkh { sqlx, clickhouse } => Self::CombinedCkh(
SqlxClient::from_conf(sqlx).await,
ClickhouseClient {
config: Arc::new(clickhouse.clone()),
},
),
AnalyticsConfig::CombinedSqlx { sqlx, clickhouse } => Self::CombinedSqlx(
SqlxClient::from_conf(sqlx).await,
ClickhouseClient {
config: Arc::new(clickhouse.clone()),
},
),
}
}
}
#[derive(Clone, Debug, serde::Deserialize)]
#[serde(tag = "source")]
#[serde(rename_all = "lowercase")]
pub enum AnalyticsConfig {
Sqlx {
sqlx: Database,
},
Clickhouse {
clickhouse: ClickhouseConfig,
},
CombinedCkh {
sqlx: Database,
clickhouse: ClickhouseConfig,
},
CombinedSqlx {
sqlx: Database,
clickhouse: ClickhouseConfig,
},
}
impl Default for AnalyticsConfig {
fn default() -> Self {
Self::Sqlx {
sqlx: Database::default(),
}
}
}
#[derive(Clone, Debug, serde::Deserialize, Default, serde::Serialize)]
pub struct ReportConfig {
pub payment_function: String,
pub refund_function: String,
pub dispute_function: String,
pub region: String,
}