mirror of
https://github.com/juspay/hyperswitch.git
synced 2025-11-01 02:57:02 +08:00
feat: Add graceful shutdown in drainer (#498)
This commit is contained in:
3
Cargo.lock
generated
3
Cargo.lock
generated
@ -1018,8 +1018,11 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_urlencoded",
|
"serde_urlencoded",
|
||||||
|
"signal-hook",
|
||||||
|
"signal-hook-tokio",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"time",
|
"time",
|
||||||
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@ -187,3 +187,4 @@ batch_size = 200 # Specifies the batch size the producer will push under a singl
|
|||||||
stream_name = "DRAINER_STREAM" # Specifies the stream name to be used by the drainer
|
stream_name = "DRAINER_STREAM" # Specifies the stream name to be used by the drainer
|
||||||
num_partitions = 64 # Specifies the number of partitions the stream will be divided into
|
num_partitions = 64 # Specifies the number of partitions the stream will be divided into
|
||||||
max_read_count = 100 # Specifies the maximum number of entries that would be read from redis stream in one call
|
max_read_count = 100 # Specifies the maximum number of entries that would be read from redis stream in one call
|
||||||
|
shutdown_interval = 1000 # Specifies how much time to wait, while waiting for threads to complete execution
|
||||||
|
|||||||
@ -20,6 +20,9 @@ ring = "0.16.20"
|
|||||||
serde = { version = "1.0.152", features = ["derive"] }
|
serde = { version = "1.0.152", features = ["derive"] }
|
||||||
serde_json = "1.0.91"
|
serde_json = "1.0.91"
|
||||||
serde_urlencoded = "0.7.1"
|
serde_urlencoded = "0.7.1"
|
||||||
|
signal-hook-tokio = { version = "0.3.1", features = ["futures-v0_3"] }
|
||||||
|
signal-hook = "0.3.14"
|
||||||
|
tokio = { version = "1.25.0", features = ["macros", "rt-multi-thread"] }
|
||||||
thiserror = "1.0.38"
|
thiserror = "1.0.38"
|
||||||
time = { version = "0.3.17", features = ["serde", "serde-well-known", "std"] }
|
time = { version = "0.3.17", features = ["serde", "serde-well-known", "std"] }
|
||||||
|
|
||||||
|
|||||||
@ -9,6 +9,7 @@ pub mod errors;
|
|||||||
pub mod ext_traits;
|
pub mod ext_traits;
|
||||||
pub mod fp_utils;
|
pub mod fp_utils;
|
||||||
pub mod pii;
|
pub mod pii;
|
||||||
|
pub mod signals;
|
||||||
pub mod validation;
|
pub mod validation;
|
||||||
|
|
||||||
/// Date-time utilities.
|
/// Date-time utilities.
|
||||||
|
|||||||
38
crates/common_utils/src/signals.rs
Normal file
38
crates/common_utils/src/signals.rs
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
//! Provide Interface for worker services to handle signals
|
||||||
|
|
||||||
|
use futures::StreamExt;
|
||||||
|
use router_env::logger;
|
||||||
|
pub use tokio::sync::oneshot;
|
||||||
|
|
||||||
|
///
|
||||||
|
/// This functions is meant to run in parallel to the application.
|
||||||
|
/// It will send a signal to the receiver when a SIGTERM or SIGINT is received
|
||||||
|
///
|
||||||
|
pub async fn signal_handler(mut sig: signal_hook_tokio::Signals, sender: oneshot::Sender<()>) {
|
||||||
|
if let Some(signal) = sig.next().await {
|
||||||
|
logger::info!(
|
||||||
|
"Received signal: {:?}",
|
||||||
|
signal_hook::low_level::signal_name(signal)
|
||||||
|
);
|
||||||
|
match signal {
|
||||||
|
signal_hook::consts::SIGTERM | signal_hook::consts::SIGINT => match sender.send(()) {
|
||||||
|
Ok(_) => {
|
||||||
|
logger::info!("Request for force shutdown received")
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
logger::error!(
|
||||||
|
"The receiver is closed, a termination call might already be sent"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// This function is used to generate a list of signals that the signal_handler should listen for
|
||||||
|
///
|
||||||
|
pub fn get_allowed_signals() -> Result<signal_hook_tokio::SignalsInfo, std::io::Error> {
|
||||||
|
signal_hook_tokio::Signals::new([signal_hook::consts::SIGTERM, signal_hook::consts::SIGINT])
|
||||||
|
}
|
||||||
@ -11,6 +11,10 @@ pub enum DrainerError {
|
|||||||
ConfigurationError(config::ConfigError),
|
ConfigurationError(config::ConfigError),
|
||||||
#[error("Metrics initialization error")]
|
#[error("Metrics initialization error")]
|
||||||
MetricsError,
|
MetricsError,
|
||||||
|
#[error("Error while configuring signals: {0}")]
|
||||||
|
SignalError(String),
|
||||||
|
#[error("Unexpected error occurred: {0}")]
|
||||||
|
UnexpectedError(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type DrainerResult<T> = error_stack::Result<T, DrainerError>;
|
pub type DrainerResult<T> = error_stack::Result<T, DrainerError>;
|
||||||
|
|||||||
@ -5,37 +5,90 @@ pub(crate) mod metrics;
|
|||||||
pub mod services;
|
pub mod services;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
mod utils;
|
mod utils;
|
||||||
use std::sync::Arc;
|
use std::sync::{atomic, Arc};
|
||||||
|
|
||||||
|
use common_utils::signals::{get_allowed_signals, oneshot};
|
||||||
pub use env as logger;
|
pub use env as logger;
|
||||||
use logger::{instrument, tracing};
|
use error_stack::{IntoReport, ResultExt};
|
||||||
use storage_models::kv;
|
use storage_models::kv;
|
||||||
|
|
||||||
use crate::{connection::pg_connection, services::Store};
|
use crate::{connection::pg_connection, services::Store};
|
||||||
|
|
||||||
#[instrument(skip(store))]
|
|
||||||
pub async fn start_drainer(
|
pub async fn start_drainer(
|
||||||
store: Arc<Store>,
|
store: Arc<Store>,
|
||||||
number_of_streams: u8,
|
number_of_streams: u8,
|
||||||
max_read_count: u64,
|
max_read_count: u64,
|
||||||
|
shutdown_interval: u32,
|
||||||
) -> errors::DrainerResult<()> {
|
) -> errors::DrainerResult<()> {
|
||||||
let mut stream_index: u8 = 0;
|
let mut stream_index: u8 = 0;
|
||||||
let mut jobs_picked: u8 = 0;
|
let mut jobs_picked: u8 = 0;
|
||||||
loop {
|
|
||||||
|
let mut shutdown_interval =
|
||||||
|
tokio::time::interval(std::time::Duration::from_millis(shutdown_interval.into()));
|
||||||
|
|
||||||
|
let signal =
|
||||||
|
get_allowed_signals()
|
||||||
|
.into_report()
|
||||||
|
.change_context(errors::DrainerError::SignalError(
|
||||||
|
"Failed while getting allowed signals".to_string(),
|
||||||
|
))?;
|
||||||
|
let (tx, mut rx) = oneshot::channel();
|
||||||
|
let handle = signal.handle();
|
||||||
|
let task_handle = tokio::spawn(common_utils::signals::signal_handler(signal, tx));
|
||||||
|
|
||||||
|
let active_tasks = Arc::new(atomic::AtomicU64::new(0));
|
||||||
|
'event: loop {
|
||||||
|
match rx.try_recv() {
|
||||||
|
Err(oneshot::error::TryRecvError::Empty) => {
|
||||||
if utils::is_stream_available(stream_index, store.clone()).await {
|
if utils::is_stream_available(stream_index, store.clone()).await {
|
||||||
tokio::spawn(drainer_handler(store.clone(), stream_index, max_read_count));
|
tokio::spawn(drainer_handler(
|
||||||
|
store.clone(),
|
||||||
|
stream_index,
|
||||||
|
max_read_count,
|
||||||
|
active_tasks.clone(),
|
||||||
|
));
|
||||||
jobs_picked += 1;
|
jobs_picked += 1;
|
||||||
}
|
}
|
||||||
(stream_index, jobs_picked) =
|
(stream_index, jobs_picked) =
|
||||||
utils::increment_stream_index((stream_index, jobs_picked), number_of_streams);
|
utils::increment_stream_index((stream_index, jobs_picked), number_of_streams);
|
||||||
}
|
}
|
||||||
|
Ok(()) | Err(oneshot::error::TryRecvError::Closed) => {
|
||||||
|
logger::info!("Awaiting shutdown!");
|
||||||
|
metrics::SHUTDOWN_SIGNAL_RECEIVED.add(&metrics::CONTEXT, 1, &[]);
|
||||||
|
let shutdown_started = tokio::time::Instant::now();
|
||||||
|
loop {
|
||||||
|
if active_tasks.load(atomic::Ordering::Acquire) == 0 {
|
||||||
|
logger::info!("Terminating drainer");
|
||||||
|
metrics::SUCCESSFUL_SHUTDOWN.add(&metrics::CONTEXT, 1, &[]);
|
||||||
|
let shutdown_ended = shutdown_started.elapsed().as_secs_f64() * 1000f64;
|
||||||
|
metrics::CLEANUP_TIME.record(&metrics::CONTEXT, shutdown_ended, &[]);
|
||||||
|
|
||||||
|
break 'event;
|
||||||
|
}
|
||||||
|
shutdown_interval.tick().await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
handle.close();
|
||||||
|
task_handle
|
||||||
|
.await
|
||||||
|
.into_report()
|
||||||
|
.change_context(errors::DrainerError::UnexpectedError(
|
||||||
|
"Failed while joining signal handler".to_string(),
|
||||||
|
))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn drainer_handler(
|
async fn drainer_handler(
|
||||||
store: Arc<Store>,
|
store: Arc<Store>,
|
||||||
stream_index: u8,
|
stream_index: u8,
|
||||||
max_read_count: u64,
|
max_read_count: u64,
|
||||||
|
active_tasks: Arc<atomic::AtomicU64>,
|
||||||
) -> errors::DrainerResult<()> {
|
) -> errors::DrainerResult<()> {
|
||||||
|
active_tasks.fetch_add(1, atomic::Ordering::Release);
|
||||||
|
|
||||||
let stream_name = utils::get_drainer_stream_name(store.clone(), stream_index);
|
let stream_name = utils::get_drainer_stream_name(store.clone(), stream_index);
|
||||||
let drainer_result = drainer(store.clone(), max_read_count, stream_name.as_str()).await;
|
let drainer_result = drainer(store.clone(), max_read_count, stream_name.as_str()).await;
|
||||||
|
|
||||||
@ -45,7 +98,10 @@ async fn drainer_handler(
|
|||||||
|
|
||||||
let flag_stream_name = utils::get_stream_key_flag(store.clone(), stream_index);
|
let flag_stream_name = utils::get_stream_key_flag(store.clone(), stream_index);
|
||||||
//TODO: USE THE RESULT FOR LOGGING
|
//TODO: USE THE RESULT FOR LOGGING
|
||||||
utils::make_stream_available(flag_stream_name.as_str(), store.redis_conn.as_ref()).await
|
let output =
|
||||||
|
utils::make_stream_available(flag_stream_name.as_str(), store.redis_conn.as_ref()).await;
|
||||||
|
active_tasks.fetch_sub(1, atomic::Ordering::Release);
|
||||||
|
output
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn drainer(
|
async fn drainer(
|
||||||
|
|||||||
@ -18,12 +18,19 @@ async fn main() -> DrainerResult<()> {
|
|||||||
|
|
||||||
let number_of_streams = store.config.drainer_num_partitions;
|
let number_of_streams = store.config.drainer_num_partitions;
|
||||||
let max_read_count = conf.drainer.max_read_count;
|
let max_read_count = conf.drainer.max_read_count;
|
||||||
|
let shutdown_intervals = conf.drainer.shutdown_interval;
|
||||||
|
|
||||||
let _guard = logger::setup(&conf.log).change_context(errors::DrainerError::MetricsError)?;
|
let _guard = logger::setup(&conf.log).change_context(errors::DrainerError::MetricsError)?;
|
||||||
|
|
||||||
logger::info!("Drainer started [{:?}] [{:?}]", conf.drainer, conf.log);
|
logger::info!("Drainer started [{:?}] [{:?}]", conf.drainer, conf.log);
|
||||||
|
|
||||||
start_drainer(store.clone(), number_of_streams, max_read_count).await?;
|
start_drainer(
|
||||||
|
store.clone(),
|
||||||
|
number_of_streams,
|
||||||
|
max_read_count,
|
||||||
|
shutdown_intervals,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
store.close().await;
|
store.close().await;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@ -48,12 +48,12 @@ pub(crate) static REDIS_STREAM_READ_TIME: Lazy<Histogram<f64>> =
|
|||||||
pub(crate) static REDIS_STREAM_TRIM_TIME: Lazy<Histogram<f64>> =
|
pub(crate) static REDIS_STREAM_TRIM_TIME: Lazy<Histogram<f64>> =
|
||||||
Lazy::new(|| DRAINER_METER.f64_histogram("REDIS_STREAM_TRIM_TIME").init());
|
Lazy::new(|| DRAINER_METER.f64_histogram("REDIS_STREAM_TRIM_TIME").init());
|
||||||
|
|
||||||
pub(crate) static _SHUTDOWN_SIGNAL_RECEIVED: Lazy<Counter<u64>> =
|
pub(crate) static SHUTDOWN_SIGNAL_RECEIVED: Lazy<Counter<u64>> =
|
||||||
Lazy::new(|| DRAINER_METER.u64_counter("SHUTDOWN_SIGNAL_RECEIVED").init());
|
Lazy::new(|| DRAINER_METER.u64_counter("SHUTDOWN_SIGNAL_RECEIVED").init());
|
||||||
|
|
||||||
pub(crate) static _SUCCESSFUL_SHUTDOWN: Lazy<Counter<u64>> =
|
pub(crate) static SUCCESSFUL_SHUTDOWN: Lazy<Counter<u64>> =
|
||||||
Lazy::new(|| DRAINER_METER.u64_counter("SUCCESSFUL_SHUTDOWN").init());
|
Lazy::new(|| DRAINER_METER.u64_counter("SUCCESSFUL_SHUTDOWN").init());
|
||||||
|
|
||||||
// Time in (ms) milliseconds
|
// Time in (ms) milliseconds
|
||||||
pub(crate) static _CLEANUP_TIME: Lazy<Histogram<f64>> =
|
pub(crate) static CLEANUP_TIME: Lazy<Histogram<f64>> =
|
||||||
Lazy::new(|| DRAINER_METER.f64_histogram("CLEANUP_TIME").init());
|
Lazy::new(|| DRAINER_METER.f64_histogram("CLEANUP_TIME").init());
|
||||||
|
|||||||
@ -44,6 +44,7 @@ pub struct DrainerSettings {
|
|||||||
pub stream_name: String,
|
pub stream_name: String,
|
||||||
pub num_partitions: u8,
|
pub num_partitions: u8,
|
||||||
pub max_read_count: u64,
|
pub max_read_count: u64,
|
||||||
|
pub shutdown_interval: u32, // in milliseconds
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Database {
|
impl Default for Database {
|
||||||
@ -65,6 +66,7 @@ impl Default for DrainerSettings {
|
|||||||
stream_name: "DRAINER_STREAM".into(),
|
stream_name: "DRAINER_STREAM".into(),
|
||||||
num_partitions: 64,
|
num_partitions: 64,
|
||||||
max_read_count: 100,
|
max_read_count: 100,
|
||||||
|
shutdown_interval: 1000, // in milliseconds
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,12 +5,12 @@ use std::{
|
|||||||
sync::{self, atomic},
|
sync::{self, atomic},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use common_utils::signals::{get_allowed_signals, oneshot};
|
||||||
use error_stack::{IntoReport, ResultExt};
|
use error_stack::{IntoReport, ResultExt};
|
||||||
use futures::future;
|
use futures::future;
|
||||||
use redis_interface::{RedisConnectionPool, RedisEntryId};
|
use redis_interface::{RedisConnectionPool, RedisEntryId};
|
||||||
use router_env::{instrument, tracing};
|
use router_env::{instrument, tracing};
|
||||||
use time::PrimitiveDateTime;
|
use time::PrimitiveDateTime;
|
||||||
use tokio::sync::oneshot;
|
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
@ -53,10 +53,7 @@ pub async fn start_consumer(
|
|||||||
));
|
));
|
||||||
|
|
||||||
let consumer_operation_counter = sync::Arc::new(atomic::AtomicU64::new(0));
|
let consumer_operation_counter = sync::Arc::new(atomic::AtomicU64::new(0));
|
||||||
let signal = signal_hook_tokio::Signals::new([
|
let signal = get_allowed_signals()
|
||||||
signal_hook::consts::SIGTERM,
|
|
||||||
signal_hook::consts::SIGINT,
|
|
||||||
])
|
|
||||||
.map_err(|error| {
|
.map_err(|error| {
|
||||||
logger::error!("Signal Handler Error: {:?}", error);
|
logger::error!("Signal Handler Error: {:?}", error);
|
||||||
errors::ProcessTrackerError::ConfigurationError
|
errors::ProcessTrackerError::ConfigurationError
|
||||||
@ -65,7 +62,7 @@ pub async fn start_consumer(
|
|||||||
.attach_printable("Failed while creating a signals handler")?;
|
.attach_printable("Failed while creating a signals handler")?;
|
||||||
let (sx, mut rx) = oneshot::channel();
|
let (sx, mut rx) = oneshot::channel();
|
||||||
let handle = signal.handle();
|
let handle = signal.handle();
|
||||||
let task_handle = tokio::spawn(pt_utils::signal_handler(signal, sx));
|
let task_handle = tokio::spawn(common_utils::signals::signal_handler(signal, sx));
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match rx.try_recv() {
|
match rx.try_recv() {
|
||||||
|
|||||||
Reference in New Issue
Block a user