mirror of
https://github.com/juspay/hyperswitch.git
synced 2025-10-28 04:04:55 +08:00
fix: auto retry once for connection closed (#3426)
Co-authored-by: venkatesh.devendran <venkatesh.devendran@juspay.in> Co-authored-by: hyperswitch-bot[bot] <148525504+hyperswitch-bot[bot]@users.noreply.github.com>
This commit is contained in:
@ -106,6 +106,7 @@ counter_metric!(APPLE_PAY_MANUAL_FLOW_FAILED_PAYMENT, GLOBAL_METER);
|
||||
counter_metric!(APPLE_PAY_SIMPLIFIED_FLOW_FAILED_PAYMENT, GLOBAL_METER);
|
||||
|
||||
// Metrics for Auto Retries
|
||||
counter_metric!(AUTO_RETRY_CONNECTION_CLOSED, GLOBAL_METER);
|
||||
counter_metric!(AUTO_RETRY_ELIGIBLE_REQUEST_COUNT, GLOBAL_METER);
|
||||
counter_metric!(AUTO_RETRY_GSM_MISS_COUNT, GLOBAL_METER);
|
||||
counter_metric!(AUTO_RETRY_GSM_FETCH_FAILURE_COUNT, GLOBAL_METER);
|
||||
|
||||
@ -561,8 +561,7 @@ pub async fn send_request(
|
||||
key: consts::METRICS_HOST_TAG_NAME.into(),
|
||||
value: url.host_str().unwrap_or_default().to_string().into(),
|
||||
};
|
||||
|
||||
let send_request = async {
|
||||
let request = {
|
||||
match request.method {
|
||||
Method::Get => client.get(url),
|
||||
Method::Post => {
|
||||
@ -616,32 +615,92 @@ pub async fn send_request(
|
||||
.timeout(Duration::from_secs(
|
||||
option_timeout_secs.unwrap_or(crate::consts::REQUEST_TIME_OUT),
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|error| match error {
|
||||
error if error.is_timeout() => {
|
||||
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
|
||||
errors::ApiClientError::RequestTimeoutReceived
|
||||
}
|
||||
error if is_connection_closed(&error) => {
|
||||
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
|
||||
errors::ApiClientError::ConnectionClosed
|
||||
}
|
||||
_ => errors::ApiClientError::RequestNotSent(error.to_string()),
|
||||
})
|
||||
.into_report()
|
||||
.attach_printable("Unable to send request to connector")
|
||||
};
|
||||
|
||||
metrics_request::record_operation_time(
|
||||
// We cannot clone the request type, because it has Form trait which is not clonable. So we are cloning the request builder here.
|
||||
let cloned_send_request = request.try_clone().map(|cloned_request| async {
|
||||
cloned_request
|
||||
.send()
|
||||
.await
|
||||
.map_err(|error| match error {
|
||||
error if error.is_timeout() => {
|
||||
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
|
||||
errors::ApiClientError::RequestTimeoutReceived
|
||||
}
|
||||
error if is_connection_closed_before_message_could_complete(&error) => {
|
||||
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
|
||||
errors::ApiClientError::ConnectionClosedIncompleteMessage
|
||||
}
|
||||
_ => errors::ApiClientError::RequestNotSent(error.to_string()),
|
||||
})
|
||||
.into_report()
|
||||
.attach_printable("Unable to send request to connector")
|
||||
});
|
||||
|
||||
let send_request = async {
|
||||
request
|
||||
.send()
|
||||
.await
|
||||
.map_err(|error| match error {
|
||||
error if error.is_timeout() => {
|
||||
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
|
||||
errors::ApiClientError::RequestTimeoutReceived
|
||||
}
|
||||
error if is_connection_closed_before_message_could_complete(&error) => {
|
||||
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
|
||||
errors::ApiClientError::ConnectionClosedIncompleteMessage
|
||||
}
|
||||
_ => errors::ApiClientError::RequestNotSent(error.to_string()),
|
||||
})
|
||||
.into_report()
|
||||
.attach_printable("Unable to send request to connector")
|
||||
};
|
||||
|
||||
let response = metrics_request::record_operation_time(
|
||||
send_request,
|
||||
&metrics::EXTERNAL_REQUEST_TIME,
|
||||
&[metrics_tag],
|
||||
&[metrics_tag.clone()],
|
||||
)
|
||||
.await
|
||||
.await;
|
||||
// Retry once if the response is connection closed.
|
||||
//
|
||||
// This is just due to the racy nature of networking.
|
||||
// hyper has a connection pool of idle connections, and it selected one to send your request.
|
||||
// Most of the time, hyper will receive the server’s FIN and drop the dead connection from its pool.
|
||||
// But occasionally, a connection will be selected from the pool
|
||||
// and written to at the same time the server is deciding to close the connection.
|
||||
// Since hyper already wrote some of the request,
|
||||
// it can’t really retry it automatically on a new connection, since the server may have acted already
|
||||
match response {
|
||||
Ok(response) => Ok(response),
|
||||
Err(error)
|
||||
if error.current_context()
|
||||
== &errors::ApiClientError::ConnectionClosedIncompleteMessage =>
|
||||
{
|
||||
metrics::AUTO_RETRY_CONNECTION_CLOSED.add(&metrics::CONTEXT, 1, &[]);
|
||||
match cloned_send_request {
|
||||
Some(cloned_request) => {
|
||||
logger::info!(
|
||||
"Retrying request due to connection closed before message could complete"
|
||||
);
|
||||
metrics_request::record_operation_time(
|
||||
cloned_request,
|
||||
&metrics::EXTERNAL_REQUEST_TIME,
|
||||
&[metrics_tag],
|
||||
)
|
||||
.await
|
||||
}
|
||||
None => {
|
||||
logger::info!("Retrying request due to connection closed before message could complete failed as request is not clonable");
|
||||
Err(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
err @ Err(_) => err,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_connection_closed(error: &reqwest::Error) -> bool {
|
||||
fn is_connection_closed_before_message_could_complete(error: &reqwest::Error) -> bool {
|
||||
let mut source = error.source();
|
||||
while let Some(err) = source {
|
||||
if let Some(hyper_err) = err.downcast_ref::<hyper::Error>() {
|
||||
|
||||
@ -267,7 +267,7 @@ pub enum ApiClientError {
|
||||
RequestTimeoutReceived,
|
||||
|
||||
#[error("connection closed before a message could complete")]
|
||||
ConnectionClosed,
|
||||
ConnectionClosedIncompleteMessage,
|
||||
|
||||
#[error("Server responded with Internal Server Error")]
|
||||
InternalServerErrorReceived,
|
||||
@ -285,8 +285,8 @@ impl ApiClientError {
|
||||
pub fn is_upstream_timeout(&self) -> bool {
|
||||
self == &Self::RequestTimeoutReceived
|
||||
}
|
||||
pub fn is_connection_closed(&self) -> bool {
|
||||
self == &Self::ConnectionClosed
|
||||
pub fn is_connection_closed_before_message_could_complete(&self) -> bool {
|
||||
self == &Self::ConnectionClosedIncompleteMessage
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user