feat(webhooks): implement automatic retries for failed webhook deliveries using scheduler (#3842)

This commit is contained in:
Sanchith Hegde
2024-03-04 12:01:02 +05:30
committed by GitHub
parent fee0663d66
commit 5bb67c7dcc
27 changed files with 965 additions and 215 deletions

View File

@ -32,6 +32,7 @@ use crate::{
events::{
api_logs::ApiEvent,
outgoing_webhook_logs::{OutgoingWebhookEvent, OutgoingWebhookEventMetric},
RawEvent,
},
logger,
routes::{app::AppStateInfo, lock_utils, metrics::request::add_attributes, AppState},
@ -43,15 +44,13 @@ use crate::{
transformers::{ForeignInto, ForeignTryInto},
},
utils::{self as helper_utils, generate_id, OptionExt, ValueExt},
workflows::outgoing_webhook_retry,
};
const OUTGOING_WEBHOOK_TIMEOUT_SECS: u64 = 5;
const MERCHANT_ID: &str = "merchant_id";
pub async fn payments_incoming_webhook_flow<
W: types::OutgoingWebhookType,
Ctx: PaymentMethodRetrieve,
>(
pub async fn payments_incoming_webhook_flow<Ctx: PaymentMethodRetrieve>(
state: AppState,
merchant_account: domain::MerchantAccount,
business_profile: diesel_models::business_profile::BusinessProfile,
@ -169,7 +168,7 @@ pub async fn payments_incoming_webhook_flow<
// If event is NOT an UnsupportedEvent, trigger Outgoing Webhook
if let Some(outgoing_event_type) = event_type {
create_event_and_trigger_outgoing_webhook::<W>(
create_event_and_trigger_outgoing_webhook(
state,
merchant_account,
business_profile,
@ -196,7 +195,7 @@ pub async fn payments_incoming_webhook_flow<
#[instrument(skip_all)]
#[allow(clippy::too_many_arguments)]
pub async fn refunds_incoming_webhook_flow<W: types::OutgoingWebhookType>(
pub async fn refunds_incoming_webhook_flow(
state: AppState,
merchant_account: domain::MerchantAccount,
business_profile: diesel_models::business_profile::BusinessProfile,
@ -275,7 +274,7 @@ pub async fn refunds_incoming_webhook_flow<W: types::OutgoingWebhookType>(
if let Some(outgoing_event_type) = event_type {
let refund_response: api_models::refunds::RefundResponse =
updated_refund.clone().foreign_into();
create_event_and_trigger_outgoing_webhook::<W>(
create_event_and_trigger_outgoing_webhook(
state,
merchant_account,
business_profile,
@ -414,7 +413,7 @@ pub async fn get_or_update_dispute_object(
}
}
pub async fn mandates_incoming_webhook_flow<W: types::OutgoingWebhookType>(
pub async fn mandates_incoming_webhook_flow(
state: AppState,
merchant_account: domain::MerchantAccount,
business_profile: diesel_models::business_profile::BusinessProfile,
@ -471,7 +470,7 @@ pub async fn mandates_incoming_webhook_flow<W: types::OutgoingWebhookType>(
);
let event_type: Option<enums::EventType> = updated_mandate.mandate_status.foreign_into();
if let Some(outgoing_event_type) = event_type {
create_event_and_trigger_outgoing_webhook::<W>(
create_event_and_trigger_outgoing_webhook(
state,
merchant_account,
business_profile,
@ -496,7 +495,7 @@ pub async fn mandates_incoming_webhook_flow<W: types::OutgoingWebhookType>(
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
pub async fn disputes_incoming_webhook_flow<W: types::OutgoingWebhookType>(
pub async fn disputes_incoming_webhook_flow(
state: AppState,
merchant_account: domain::MerchantAccount,
business_profile: diesel_models::business_profile::BusinessProfile,
@ -538,7 +537,7 @@ pub async fn disputes_incoming_webhook_flow<W: types::OutgoingWebhookType>(
let disputes_response = Box::new(dispute_object.clone().foreign_into());
let event_type: enums::EventType = dispute_object.dispute_status.foreign_into();
create_event_and_trigger_outgoing_webhook::<W>(
create_event_and_trigger_outgoing_webhook(
state,
merchant_account,
business_profile,
@ -562,7 +561,7 @@ pub async fn disputes_incoming_webhook_flow<W: types::OutgoingWebhookType>(
}
}
async fn bank_transfer_webhook_flow<W: types::OutgoingWebhookType, Ctx: PaymentMethodRetrieve>(
async fn bank_transfer_webhook_flow<Ctx: PaymentMethodRetrieve>(
state: AppState,
merchant_account: domain::MerchantAccount,
business_profile: diesel_models::business_profile::BusinessProfile,
@ -624,7 +623,7 @@ async fn bank_transfer_webhook_flow<W: types::OutgoingWebhookType, Ctx: PaymentM
// If event is NOT an UnsupportedEvent, trigger Outgoing Webhook
if let Some(outgoing_event_type) = event_type {
create_event_and_trigger_outgoing_webhook::<W>(
create_event_and_trigger_outgoing_webhook(
state,
merchant_account,
business_profile,
@ -649,53 +648,7 @@ async fn bank_transfer_webhook_flow<W: types::OutgoingWebhookType, Ctx: PaymentM
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
pub async fn create_event_and_trigger_appropriate_outgoing_webhook(
state: AppState,
merchant_account: domain::MerchantAccount,
business_profile: diesel_models::business_profile::BusinessProfile,
event_type: enums::EventType,
event_class: enums::EventClass,
intent_reference_id: Option<String>,
primary_object_id: String,
primary_object_type: enums::EventObjectType,
content: api::OutgoingWebhookContent,
) -> CustomResult<(), errors::ApiErrorResponse> {
match merchant_account.get_compatible_connector() {
#[cfg(feature = "stripe")]
Some(api_models::enums::Connector::Stripe) => {
create_event_and_trigger_outgoing_webhook::<stripe_webhooks::StripeOutgoingWebhook>(
state.clone(),
merchant_account,
business_profile,
event_type,
event_class,
intent_reference_id,
primary_object_id,
primary_object_type,
content,
)
.await
}
_ => {
create_event_and_trigger_outgoing_webhook::<api_models::webhooks::OutgoingWebhook>(
state.clone(),
merchant_account,
business_profile,
event_type,
event_class,
intent_reference_id,
primary_object_id,
primary_object_type,
content,
)
.await
}
}
}
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
pub async fn create_event_and_trigger_outgoing_webhook<W: types::OutgoingWebhookType>(
pub(crate) async fn create_event_and_trigger_outgoing_webhook(
state: AppState,
merchant_account: domain::MerchantAccount,
business_profile: diesel_models::business_profile::BusinessProfile,
@ -706,6 +659,7 @@ pub async fn create_event_and_trigger_outgoing_webhook<W: types::OutgoingWebhook
primary_object_type: enums::EventObjectType,
content: api::OutgoingWebhookContent,
) -> CustomResult<(), errors::ApiErrorResponse> {
let merchant_id = business_profile.merchant_id.clone();
let event_id = format!("{primary_object_id}_{event_type}");
let new_event = storage::EventNew {
event_id: event_id.clone(),
@ -723,7 +677,7 @@ pub async fn create_event_and_trigger_outgoing_webhook<W: types::OutgoingWebhook
Ok(event) => Ok(event),
Err(error) => {
if error.current_context().is_db_unique_violation() {
logger::info!("Merchant already notified about the event {event_id}");
logger::debug!("Event `{event_id}` already exists in the database");
return Ok(());
} else {
logger::error!(event_insertion_failure=?error);
@ -736,57 +690,132 @@ pub async fn create_event_and_trigger_outgoing_webhook<W: types::OutgoingWebhook
if state.conf.webhooks.outgoing_enabled {
let outgoing_webhook = api::OutgoingWebhook {
merchant_id: merchant_account.merchant_id.clone(),
merchant_id: merchant_id.clone(),
event_id: event.event_id.clone(),
event_type: event.event_type,
content: content.clone(),
timestamp: event.created_at,
};
let state_clone = state.clone();
let process_tracker = add_outgoing_webhook_retry_task_to_process_tracker(
&*state.store,
&business_profile,
&event,
)
.await
.map_err(|error| {
logger::error!(
?error,
"Failed to add outgoing webhook retry task to process tracker"
);
error
})
.ok();
// Using a tokio spawn here and not arbiter because not all caller of this function
// may have an actix arbiter
tokio::spawn(async move {
let mut error = None;
let result =
trigger_webhook_to_merchant::<W>(business_profile, outgoing_webhook, state).await;
if let Err(e) = result {
error.replace(
serde_json::to_value(e.current_context())
.into_report()
.attach_printable("Failed to serialize json error response")
.change_context(errors::ApiErrorResponse::WebhookProcessingFailure)
.ok()
.into(),
);
logger::error!(?e);
tokio::spawn(
async move {
trigger_appropriate_webhook_and_raise_event(
state,
merchant_account,
business_profile,
outgoing_webhook,
types::WebhookDeliveryAttempt::InitialAttempt,
content,
event.event_id,
event_type,
process_tracker,
)
.await;
}
let outgoing_webhook_event_type = content.get_outgoing_webhook_event_type();
let webhook_event = OutgoingWebhookEvent::new(
merchant_account.merchant_id.clone(),
event.event_id.clone(),
event_type,
outgoing_webhook_event_type,
error,
);
match webhook_event.clone().try_into() {
Ok(event) => {
state_clone.event_handler().log_event(event);
}
Err(err) => {
logger::error!(error=?err, event=?webhook_event, "Error Logging Outgoing Webhook Event");
}
}
}.in_current_span());
.in_current_span(),
);
}
Ok(())
}
pub async fn trigger_webhook_to_merchant<W: types::OutgoingWebhookType>(
#[allow(clippy::too_many_arguments)]
pub(crate) async fn trigger_appropriate_webhook_and_raise_event(
state: AppState,
merchant_account: domain::MerchantAccount,
business_profile: diesel_models::business_profile::BusinessProfile,
outgoing_webhook: api::OutgoingWebhook,
delivery_attempt: types::WebhookDeliveryAttempt,
content: api::OutgoingWebhookContent,
event_id: String,
event_type: enums::EventType,
process_tracker: Option<storage::ProcessTracker>,
) {
match merchant_account.get_compatible_connector() {
#[cfg(feature = "stripe")]
Some(api_models::enums::Connector::Stripe) => {
trigger_webhook_and_raise_event::<stripe_webhooks::StripeOutgoingWebhook>(
state,
business_profile,
outgoing_webhook,
delivery_attempt,
content,
event_id,
event_type,
process_tracker,
)
.await
}
_ => {
trigger_webhook_and_raise_event::<api_models::webhooks::OutgoingWebhook>(
state,
business_profile,
outgoing_webhook,
delivery_attempt,
content,
event_id,
event_type,
process_tracker,
)
.await
}
}
}
#[allow(clippy::too_many_arguments)]
async fn trigger_webhook_and_raise_event<W: types::OutgoingWebhookType>(
state: AppState,
business_profile: diesel_models::business_profile::BusinessProfile,
outgoing_webhook: api::OutgoingWebhook,
delivery_attempt: types::WebhookDeliveryAttempt,
content: api::OutgoingWebhookContent,
event_id: String,
event_type: enums::EventType,
process_tracker: Option<storage::ProcessTracker>,
) {
let merchant_id = business_profile.merchant_id.clone();
let trigger_webhook_result = trigger_webhook_to_merchant::<W>(
state.clone(),
business_profile,
outgoing_webhook,
delivery_attempt,
process_tracker,
)
.await;
raise_webhooks_analytics_event(
state,
trigger_webhook_result,
content,
&merchant_id,
&event_id,
event_type,
);
}
async fn trigger_webhook_to_merchant<W: types::OutgoingWebhookType>(
state: AppState,
business_profile: diesel_models::business_profile::BusinessProfile,
webhook: api::OutgoingWebhook,
state: AppState,
delivery_attempt: types::WebhookDeliveryAttempt,
process_tracker: Option<storage::ProcessTracker>,
) -> CustomResult<(), errors::WebhooksFlowError> {
let webhook_details_json = business_profile
.webhook_details
@ -843,40 +872,135 @@ pub async fn trigger_webhook_to_merchant<W: types::OutgoingWebhookType>(
);
logger::debug!(outgoing_webhook_response=?response);
match response {
Err(e) => {
// [#217]: Schedule webhook for retry.
Err(e).change_context(errors::WebhooksFlowError::CallToMerchantFailed)?;
}
Ok(res) => {
if res.status().is_success() {
metrics::WEBHOOK_OUTGOING_RECEIVED_COUNT.add(
&metrics::CONTEXT,
1,
&[metrics::KeyValue::new(
MERCHANT_ID,
business_profile.merchant_id.clone(),
)],
);
let update_event = storage::EventUpdate::UpdateWebhookNotified {
is_webhook_notified: Some(true),
};
state
let api_client_error_handler =
|client_error: error_stack::Report<errors::ApiClientError>,
delivery_attempt: types::WebhookDeliveryAttempt| {
let error =
client_error.change_context(errors::WebhooksFlowError::CallToMerchantFailed);
logger::error!(
?error,
?delivery_attempt,
"An error occurred when sending webhook to merchant"
);
};
let success_response_handler =
|state: AppState,
merchant_id: String,
outgoing_webhook_event_id: String,
process_tracker: Option<storage::ProcessTracker>,
business_status: &'static str| async move {
metrics::WEBHOOK_OUTGOING_RECEIVED_COUNT.add(
&metrics::CONTEXT,
1,
&[metrics::KeyValue::new(MERCHANT_ID, merchant_id)],
);
let update_event = storage::EventUpdate::UpdateWebhookNotified {
is_webhook_notified: Some(true),
};
state
.store
.update_event(outgoing_webhook_event_id, update_event)
.await
.change_context(errors::WebhooksFlowError::WebhookEventUpdationFailed)?;
match process_tracker {
Some(process_tracker) => state
.store
.update_event(outgoing_webhook_event_id, update_event)
.as_scheduler()
.finish_process_with_business_status(process_tracker, business_status.into())
.await
.change_context(errors::WebhooksFlowError::WebhookEventUpdationFailed)?;
} else {
metrics::WEBHOOK_OUTGOING_NOT_RECEIVED_COUNT.add(
&metrics::CONTEXT,
1,
&[metrics::KeyValue::new(
MERCHANT_ID,
business_profile.merchant_id.clone(),
)],
);
// [#217]: Schedule webhook for retry.
Err(errors::WebhooksFlowError::NotReceivedByMerchant).into_report()?;
.change_context(
errors::WebhooksFlowError::OutgoingWebhookProcessTrackerTaskUpdateFailed,
),
None => Ok(()),
}
};
let error_response_handler = |merchant_id: String,
delivery_attempt: types::WebhookDeliveryAttempt,
status_code: u16,
log_message: &'static str| {
metrics::WEBHOOK_OUTGOING_NOT_RECEIVED_COUNT.add(
&metrics::CONTEXT,
1,
&[metrics::KeyValue::new(MERCHANT_ID, merchant_id)],
);
let error = report!(errors::WebhooksFlowError::NotReceivedByMerchant);
logger::warn!(?error, ?delivery_attempt, ?status_code, %log_message);
};
match delivery_attempt {
types::WebhookDeliveryAttempt::InitialAttempt => match response {
Err(client_error) => api_client_error_handler(client_error, delivery_attempt),
Ok(response) => {
if response.status().is_success() {
success_response_handler(
state.clone(),
business_profile.merchant_id,
outgoing_webhook_event_id,
process_tracker,
"INITIAL_DELIVERY_ATTEMPT_SUCCESSFUL",
)
.await?;
} else {
error_response_handler(
business_profile.merchant_id,
delivery_attempt,
response.status().as_u16(),
"Ignoring error when sending webhook to merchant",
);
}
}
},
types::WebhookDeliveryAttempt::AutomaticRetry => {
let process_tracker = process_tracker
.get_required_value("process_tracker")
.change_context(errors::WebhooksFlowError::OutgoingWebhookRetrySchedulingFailed)
.attach_printable("`process_tracker` is unavailable in automatic retry flow")?;
match response {
Err(client_error) => {
api_client_error_handler(client_error, delivery_attempt);
// Schedule a retry attempt for webhook delivery
outgoing_webhook_retry::retry_webhook_delivery_task(
&*state.store,
&business_profile.merchant_id,
process_tracker,
)
.await
.change_context(
errors::WebhooksFlowError::OutgoingWebhookRetrySchedulingFailed,
)?;
}
Ok(response) => {
if response.status().is_success() {
success_response_handler(
state.clone(),
business_profile.merchant_id,
outgoing_webhook_event_id,
Some(process_tracker),
"COMPLETED_BY_PT",
)
.await?;
} else {
error_response_handler(
business_profile.merchant_id.clone(),
delivery_attempt,
response.status().as_u16(),
"An error occurred when sending webhook to merchant",
);
// Schedule a retry attempt for webhook delivery
outgoing_webhook_retry::retry_webhook_delivery_task(
&*state.store,
&business_profile.merchant_id,
process_tracker,
)
.await
.change_context(
errors::WebhooksFlowError::OutgoingWebhookRetrySchedulingFailed,
)?;
}
}
}
}
}
@ -884,6 +1008,48 @@ pub async fn trigger_webhook_to_merchant<W: types::OutgoingWebhookType>(
Ok(())
}
fn raise_webhooks_analytics_event(
state: AppState,
trigger_webhook_result: CustomResult<(), errors::WebhooksFlowError>,
content: api::OutgoingWebhookContent,
merchant_id: &str,
event_id: &str,
event_type: enums::EventType,
) {
let error = if let Err(error) = trigger_webhook_result {
logger::error!(?error, "Failed to send webhook to merchant");
serde_json::to_value(error.current_context())
.into_report()
.change_context(errors::ApiErrorResponse::WebhookProcessingFailure)
.map_err(|error| {
logger::error!(?error, "Failed to serialize outgoing webhook error as JSON");
error
})
.ok()
} else {
None
};
let outgoing_webhook_event_content = content.get_outgoing_webhook_event_content();
let webhook_event = OutgoingWebhookEvent::new(
merchant_id.to_owned(),
event_id.to_owned(),
event_type,
outgoing_webhook_event_content,
error,
);
match RawEvent::try_from(webhook_event.clone()) {
Ok(event) => {
state.event_handler().log_event(event);
}
Err(error) => {
logger::error!(?error, event=?webhook_event, "Error logging outgoing webhook event");
}
}
}
pub async fn webhooks_wrapper<W: types::OutgoingWebhookType, Ctx: PaymentMethodRetrieve>(
flow: &impl router_env::types::FlowMetric,
state: AppState,
@ -1196,7 +1362,7 @@ pub async fn webhooks_core<W: types::OutgoingWebhookType, Ctx: PaymentMethodRetr
})?;
match flow_type {
api::WebhookFlow::Payment => Box::pin(payments_incoming_webhook_flow::<W, Ctx>(
api::WebhookFlow::Payment => Box::pin(payments_incoming_webhook_flow::<Ctx>(
state.clone(),
merchant_account,
business_profile,
@ -1207,7 +1373,7 @@ pub async fn webhooks_core<W: types::OutgoingWebhookType, Ctx: PaymentMethodRetr
.await
.attach_printable("Incoming webhook flow for payments failed")?,
api::WebhookFlow::Refund => Box::pin(refunds_incoming_webhook_flow::<W>(
api::WebhookFlow::Refund => Box::pin(refunds_incoming_webhook_flow(
state.clone(),
merchant_account,
business_profile,
@ -1220,7 +1386,7 @@ pub async fn webhooks_core<W: types::OutgoingWebhookType, Ctx: PaymentMethodRetr
.await
.attach_printable("Incoming webhook flow for refunds failed")?,
api::WebhookFlow::Dispute => disputes_incoming_webhook_flow::<W>(
api::WebhookFlow::Dispute => disputes_incoming_webhook_flow(
state.clone(),
merchant_account,
business_profile,
@ -1233,7 +1399,7 @@ pub async fn webhooks_core<W: types::OutgoingWebhookType, Ctx: PaymentMethodRetr
.await
.attach_printable("Incoming webhook flow for disputes failed")?,
api::WebhookFlow::BankTransfer => Box::pin(bank_transfer_webhook_flow::<W, Ctx>(
api::WebhookFlow::BankTransfer => Box::pin(bank_transfer_webhook_flow::<Ctx>(
state.clone(),
merchant_account,
business_profile,
@ -1246,7 +1412,7 @@ pub async fn webhooks_core<W: types::OutgoingWebhookType, Ctx: PaymentMethodRetr
api::WebhookFlow::ReturnResponse => WebhookResponseTracker::NoEffect,
api::WebhookFlow::Mandate => mandates_incoming_webhook_flow::<W>(
api::WebhookFlow::Mandate => mandates_incoming_webhook_flow(
state.clone(),
merchant_account,
business_profile,
@ -1380,3 +1546,68 @@ async fn fetch_optional_mca_and_connector(
Ok((None, connector))
}
}
pub async fn add_outgoing_webhook_retry_task_to_process_tracker(
db: &dyn StorageInterface,
business_profile: &diesel_models::business_profile::BusinessProfile,
event: &storage::Event,
) -> CustomResult<storage::ProcessTracker, errors::StorageError> {
let schedule_time = outgoing_webhook_retry::get_webhook_delivery_retry_schedule_time(
db,
&business_profile.merchant_id,
0,
)
.await
.ok_or(errors::StorageError::ValueNotFound(
"Process tracker schedule time".into(), // Can raise a better error here
))
.into_report()
.attach_printable("Failed to obtain initial process tracker schedule time")?;
let tracking_data = types::OutgoingWebhookTrackingData {
merchant_id: business_profile.merchant_id.clone(),
business_profile_id: business_profile.profile_id.clone(),
event_type: event.event_type,
event_class: event.event_class,
primary_object_id: event.primary_object_id.clone(),
primary_object_type: event.primary_object_type,
};
let runner = storage::ProcessTrackerRunner::OutgoingWebhookRetryWorkflow;
let task = "OUTGOING_WEBHOOK_RETRY";
let tag = ["OUTGOING_WEBHOOKS"];
let process_tracker_id = scheduler::utils::get_process_tracker_id(
runner,
task,
&event.primary_object_id,
&business_profile.merchant_id,
);
let process_tracker_entry = storage::ProcessTrackerNew::new(
process_tracker_id,
task,
runner,
tag,
tracking_data,
schedule_time,
)
.map_err(errors::StorageError::from)?;
match db.insert_process(process_tracker_entry).await {
Ok(process_tracker) => {
crate::routes::metrics::TASKS_ADDED_COUNT.add(
&metrics::CONTEXT,
1,
&[add_attributes("flow", "OutgoingWebhookRetry")],
);
Ok(process_tracker)
}
Err(error) => {
crate::routes::metrics::TASK_ADDITION_FAILURES_COUNT.add(
&metrics::CONTEXT,
1,
&[add_attributes("flow", "OutgoingWebhookRetry")],
);
Err(error)
}
}
}