-
Notifications
You must be signed in to change notification settings - Fork 90
Handle OS signals (SIGTERM/SIGINT) for graceful pipeline shutdown #2325
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -64,7 +64,7 @@ use otap_df_engine::ReceivedAtNode; | |
| use otap_df_engine::Unwindable; | ||
| use otap_df_engine::context::{ControllerContext, PipelineContext}; | ||
| use otap_df_engine::control::{ | ||
| PipelineCtrlMsgReceiver, PipelineCtrlMsgSender, pipeline_ctrl_msg_channel, | ||
| PipelineAdminSender, PipelineCtrlMsgReceiver, PipelineCtrlMsgSender, pipeline_ctrl_msg_channel, | ||
| }; | ||
| use otap_df_engine::entity_context::{ | ||
| node_entity_key, pipeline_entity_key, set_pipeline_entity_key, | ||
|
|
@@ -1256,21 +1256,19 @@ impl<PData: 'static + Clone + Send + Sync + std::fmt::Debug + ReceivedAtNode + U | |
| // Drop the original metrics sender so only pipeline threads hold references | ||
| drop(metrics_reporter); | ||
|
|
||
| // Convert the concrete senders to trait objects once, shared by both the | ||
| // admin HTTP server and the OS-signal handler for graceful shutdown. | ||
| let admin_senders: Vec<Arc<dyn PipelineAdminSender>> = ctrl_msg_senders | ||
| .into_iter() | ||
| .map(|sender| Arc::new(sender) as Arc<dyn PipelineAdminSender>) | ||
| .collect(); | ||
| let signal_senders = admin_senders.clone(); | ||
|
|
||
| // Start the admin HTTP server | ||
| let admin_server_handle = spawn_thread_local_task( | ||
| "http-admin", | ||
| admin_tracing_setup, | ||
| move |cancellation_token| { | ||
| // Convert the concrete senders to trait objects for the admin crate | ||
| let admin_senders: Vec<Arc<dyn otap_df_engine::control::PipelineAdminSender>> = | ||
| ctrl_msg_senders | ||
| .into_iter() | ||
| .map(|sender| { | ||
| Arc::new(sender) | ||
| as Arc<dyn otap_df_engine::control::PipelineAdminSender> | ||
| }) | ||
| .collect(); | ||
|
|
||
| otap_df_admin::run( | ||
| admin_settings, | ||
| obs_state_handle, | ||
|
|
@@ -1281,6 +1279,15 @@ impl<PData: 'static + Clone + Send + Sync + std::fmt::Debug + ReceivedAtNode + U | |
| }, | ||
| )?; | ||
|
|
||
| // Start the OS-signal listener on a dedicated background thread so it | ||
| // runs concurrently with the pipeline join loop below. When the first | ||
| // SIGINT/SIGTERM arrives it sends shutdown messages to all pipelines, | ||
| // which causes them to drain and their threads to exit, unblocking the | ||
| // join loop. | ||
| if run_mode == RunMode::ParkMainThread { | ||
| Self::spawn_shutdown_signal_listener(signal_senders); | ||
| } | ||
|
|
||
| // Wait for all pipeline threads to finish and collect their results | ||
| let mut results: Vec<Result<(), Error>> = Vec::with_capacity(threads.len()); | ||
| for (thread_name, thread_id, pipeline_key, handle) in threads { | ||
|
|
@@ -1325,11 +1332,6 @@ impl<PData: 'static + Clone + Send + Sync + std::fmt::Debug + ReceivedAtNode + U | |
| return Err(err); | ||
| } | ||
|
|
||
| // In standard engine mode we keep the main thread parked after startup. | ||
| if run_mode == RunMode::ParkMainThread { | ||
| thread::park(); | ||
| } | ||
|
|
||
| // All pipelines have finished; shut down the admin HTTP server and metric aggregator gracefully. | ||
| engine_metrics_handle.shutdown_and_join()?; | ||
| admin_server_handle.shutdown_and_join()?; | ||
|
|
@@ -1343,6 +1345,118 @@ impl<PData: 'static + Clone + Send + Sync + std::fmt::Debug + ReceivedAtNode + U | |
| Ok(()) | ||
| } | ||
|
|
||
| /// Spawns a dedicated background thread that listens for OS termination | ||
| /// signals (SIGINT / SIGTERM on Unix, Ctrl-C on all platforms) and sends | ||
| /// graceful shutdown messages to every pipeline. | ||
| /// | ||
| /// Follows the same double-signal convention used by the OpenTelemetry | ||
| /// Collector (Go): | ||
| /// - **First signal** → initiates graceful shutdown (drain pipelines). | ||
| /// - **Second signal** → forces immediate process exit (`std::process::exit(1)`). | ||
| /// | ||
| /// This allows Kubernetes (or any process manager that sends SIGTERM) to | ||
| /// trigger an orderly drain of in-flight telemetry data before the pod is | ||
| /// killed. If the graceful drain hangs, a second signal provides an escape | ||
| /// hatch. | ||
| /// | ||
| /// The spawned thread is intentionally detached — it will be cleaned up | ||
| /// when the process exits normally or is force-killed by the second signal. | ||
| fn spawn_shutdown_signal_listener(senders: Vec<Arc<dyn PipelineAdminSender>>) { | ||
| // The JoinHandle is intentionally dropped — the thread is detached and | ||
| // will be cleaned up when the process exits. | ||
| drop( | ||
| thread::Builder::new() | ||
| .name("signal-handler".into()) | ||
| .spawn(move || { | ||
| // Build a lightweight single-threaded runtime for signal handling. | ||
| let rt = tokio::runtime::Builder::new_current_thread() | ||
| .enable_all() | ||
| .build() | ||
| .expect("failed to create signal-handler runtime"); | ||
|
|
||
| rt.block_on(async { | ||
| // ── First signal: graceful shutdown ───────────────── | ||
| let signal_name = Self::recv_termination_signal().await; | ||
|
|
||
| otel_info!( | ||
| "shutdown.signal_received", | ||
| signal = signal_name, | ||
| message = "OS termination signal received, initiating graceful \ | ||
| shutdown. Send the signal again to force immediate exit." | ||
| ); | ||
|
|
||
| // Give pipelines a generous deadline to drain (60 s by default — | ||
| // matches the default Kubernetes terminationGracePeriodSeconds). | ||
| let deadline = | ||
| std::time::Instant::now() + std::time::Duration::from_secs(60); | ||
|
|
||
| let mut errors = Vec::new(); | ||
| for sender in &senders { | ||
| if let Err(e) = sender.try_send_shutdown( | ||
| deadline, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One correctness issue here: For this PR, I think the smallest fix could be a bounded retry inside the existing As a follow-up, we can either add a proper Longer term, a dedicated out-of-band shutdown signal (e.g. watch channel per pipeline) also gives us a clean reusable
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1, separate channel for control signals makes sense. |
||
| format!("Shutdown requested via OS signal ({signal_name})."), | ||
| ) { | ||
| errors.push(e.to_string()); | ||
| } | ||
| } | ||
|
|
||
| if errors.is_empty() { | ||
| otel_info!( | ||
| "shutdown.signal_dispatched", | ||
| pipeline_count = senders.len(), | ||
| message = "Shutdown message sent to all pipelines" | ||
| ); | ||
| } else { | ||
| otel_error!( | ||
| "shutdown.signal_dispatch_failed", | ||
| error_count = errors.len(), | ||
| message = "Some pipelines could not be notified of shutdown" | ||
| ); | ||
| } | ||
|
|
||
| // ── Second signal: force exit ─────────────────────── | ||
| let signal_name = Self::recv_termination_signal().await; | ||
| otel_error!( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently it seems like it is sync, but if this ever uses a bufferred writer (which I doubt it will) this message may not get printed before exit. Maybe an eprintln! right under this for good measure?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agree. Will swap to eprintln.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the purpose of raw_error! |
||
| "shutdown.force_exit", | ||
| signal = signal_name, | ||
| message = "Second termination signal received, forcing immediate exit" | ||
| ); | ||
| std::process::exit(1); | ||
| }); | ||
| }) | ||
| .expect("failed to spawn signal-handler thread"), | ||
| ); | ||
| } | ||
|
|
||
| /// Awaits the first OS termination signal and returns its name. | ||
| /// | ||
| /// On Unix this listens for both SIGINT and SIGTERM. | ||
| /// On other platforms only Ctrl-C (SIGINT equivalent) is supported. | ||
| async fn recv_termination_signal() -> &'static str { | ||
| #[cfg(unix)] | ||
| { | ||
| use tokio::signal::unix::{SignalKind, signal}; | ||
|
|
||
| let mut sigterm = | ||
| signal(SignalKind::terminate()).expect("failed to register SIGTERM handler"); | ||
| let mut sigint = | ||
| signal(SignalKind::interrupt()).expect("failed to register SIGINT handler"); | ||
|
|
||
| tokio::select! { | ||
| _ = sigterm.recv() => "SIGTERM", | ||
| _ = sigint.recv() => "SIGINT", | ||
| } | ||
| } | ||
|
|
||
| #[cfg(not(unix))] | ||
| { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is the windows equivalent that handles both ctrl_c, ctrl_break. }
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI: On windows platform Ctrl+C can't be reliably sent to a process without console handle and it will be ignored. Safest option is to use CTRL_BREAK. |
||
| tokio::signal::ctrl_c() | ||
| .await | ||
| .expect("failed to listen for Ctrl-C"); | ||
| "Ctrl-C" | ||
| } | ||
| } | ||
|
|
||
| /// Selects which CPU cores to use based on the given allocation. | ||
| fn select_cores_for_allocation( | ||
| mut available_core_ids: Vec<CoreId>, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: named constant for 60 secs?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
additional nit: should be configurable