Skip to content

Commit aa92c1d

Browse files
committed
feat: Support SSH keepalive settings in interactive mode
Fix #168: Interactive mode now properly honors SSH keepalive configuration (--server-alive-interval and --server-alive-count-max CLI options, SSH config file settings, and YAML config file settings). Changes: - Add ssh_connection_config field to InteractiveCommand struct - Create build_ssh_connection_config helper in dispatcher for code reuse - Update establish_connection to use Client::connect_with_ssh_config - Pass SshConnectionConfig to JumpHostChain for jump host connections - Add connection health check timeout to PTY session loop - Add interactive mode keepalive tests to ssh_keepalive_test.rs This fix ensures that interactive mode connections: 1. Honor CLI --server-alive-interval/--server-alive-count-max options 2. Honor SSH config file ServerAliveInterval/ServerAliveCountMax settings 3. Honor YAML config file server_alive_interval/server_alive_count_max settings 4. Apply keepalive settings to both direct and jump host connections 5. Have periodic health checks to detect dead connections
1 parent b2be72c commit aa92c1d

7 files changed

Lines changed: 343 additions & 44 deletions

File tree

src/app/dispatcher.rs

Lines changed: 60 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,51 @@ use super::initialization::determine_use_keychain;
3838
use super::initialization::{determine_ssh_key_path, AppContext};
3939
use super::utils::format_duration;
4040

41+
/// Build SSH connection config with keepalive settings.
42+
/// Precedence: CLI > SSH config > YAML config > defaults
43+
fn build_ssh_connection_config(
44+
cli: &Cli,
45+
ctx: &AppContext,
46+
hostname: Option<&str>,
47+
cluster_name: Option<&str>,
48+
) -> SshConnectionConfig {
49+
let keepalive_interval = cli
50+
.server_alive_interval
51+
.or_else(|| {
52+
ctx.ssh_config
53+
.get_int_option(hostname, "serveraliveinterval")
54+
.map(|v| v as u64)
55+
})
56+
.or_else(|| ctx.config.get_server_alive_interval(cluster_name))
57+
.unwrap_or(DEFAULT_KEEPALIVE_INTERVAL);
58+
59+
let keepalive_max = cli
60+
.server_alive_count_max
61+
.or_else(|| {
62+
ctx.ssh_config
63+
.get_int_option(hostname, "serveralivecountmax")
64+
.map(|v| v as usize)
65+
})
66+
.or_else(|| ctx.config.get_server_alive_count_max(cluster_name))
67+
.unwrap_or(DEFAULT_KEEPALIVE_MAX);
68+
69+
let ssh_connection_config = SshConnectionConfig::new()
70+
.with_keepalive_interval(if keepalive_interval == 0 {
71+
None
72+
} else {
73+
Some(keepalive_interval)
74+
})
75+
.with_keepalive_max(keepalive_max);
76+
77+
tracing::debug!(
78+
"SSH keepalive config: interval={:?}s, max={}",
79+
ssh_connection_config.keepalive_interval,
80+
ssh_connection_config.keepalive_max
81+
);
82+
83+
ssh_connection_config
84+
}
85+
4186
/// Dispatch commands to their appropriate handlers
4287
pub async fn dispatch_command(cli: &Cli, ctx: &AppContext) -> Result<()> {
4388
// Get command to execute
@@ -277,6 +322,11 @@ async fn handle_interactive_command(
277322
.get_cluster_jump_host(ctx.cluster_name.as_deref().or(cli.cluster.as_deref()))
278323
});
279324

325+
// Build SSH connection config with keepalive settings for interactive mode
326+
let effective_cluster_name = ctx.cluster_name.as_deref().or(cli.cluster.as_deref());
327+
let ssh_connection_config =
328+
build_ssh_connection_config(cli, ctx, hostname.as_deref(), effective_cluster_name);
329+
280330
let interactive_cmd = InteractiveCommand {
281331
single_node: merged_mode.0,
282332
multiplex: merged_mode.1,
@@ -296,6 +346,7 @@ async fn handle_interactive_command(
296346
jump_hosts,
297347
pty_config,
298348
use_pty,
349+
ssh_connection_config,
299350
};
300351

301352
let result = interactive_cmd.execute().await?;
@@ -345,6 +396,11 @@ async fn handle_exec_command(cli: &Cli, ctx: &AppContext, command: &str) -> Resu
345396
.get_cluster_jump_host(ctx.cluster_name.as_deref().or(cli.cluster.as_deref()))
346397
});
347398

399+
// Build SSH connection config with keepalive settings for SSH mode interactive session
400+
let effective_cluster_name = ctx.cluster_name.as_deref().or(cli.cluster.as_deref());
401+
let ssh_connection_config =
402+
build_ssh_connection_config(cli, ctx, hostname.as_deref(), effective_cluster_name);
403+
348404
let interactive_cmd = InteractiveCommand {
349405
single_node: true,
350406
multiplex: false,
@@ -364,6 +420,7 @@ async fn handle_exec_command(cli: &Cli, ctx: &AppContext, command: &str) -> Resu
364420
jump_hosts,
365421
pty_config,
366422
use_pty,
423+
ssh_connection_config,
367424
};
368425

369426
let result = interactive_cmd.execute().await?;
@@ -434,43 +491,9 @@ async fn handle_exec_command(cli: &Cli, ctx: &AppContext, command: &str) -> Resu
434491
tracing::info!("Using jump host: {}", jh);
435492
}
436493

437-
// Build SSH connection config with precedence: CLI > SSH config > YAML config > defaults
438-
let keepalive_interval = cli
439-
.server_alive_interval
440-
.or_else(|| {
441-
ctx.ssh_config
442-
.get_int_option(hostname.as_deref(), "serveraliveinterval")
443-
.map(|v| v as u64)
444-
})
445-
.or_else(|| ctx.config.get_server_alive_interval(effective_cluster_name))
446-
.unwrap_or(DEFAULT_KEEPALIVE_INTERVAL);
447-
448-
let keepalive_max = cli
449-
.server_alive_count_max
450-
.or_else(|| {
451-
ctx.ssh_config
452-
.get_int_option(hostname.as_deref(), "serveralivecountmax")
453-
.map(|v| v as usize)
454-
})
455-
.or_else(|| {
456-
ctx.config
457-
.get_server_alive_count_max(effective_cluster_name)
458-
})
459-
.unwrap_or(DEFAULT_KEEPALIVE_MAX);
460-
461-
let ssh_connection_config = SshConnectionConfig::new()
462-
.with_keepalive_interval(if keepalive_interval == 0 {
463-
None
464-
} else {
465-
Some(keepalive_interval)
466-
})
467-
.with_keepalive_max(keepalive_max);
468-
469-
tracing::debug!(
470-
"SSH keepalive config: interval={:?}s, max={}",
471-
ssh_connection_config.keepalive_interval,
472-
ssh_connection_config.keepalive_max
473-
);
494+
// Build SSH connection config with keepalive settings for exec mode
495+
let ssh_connection_config =
496+
build_ssh_connection_config(cli, ctx, hostname.as_deref(), effective_cluster_name);
474497

475498
let params = ExecuteCommandParams {
476499
nodes: ctx.nodes.clone(),

src/commands/interactive/connection.rs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::jump::{parse_jump_hosts, JumpHostChain};
2626
use crate::node::Node;
2727
use crate::ssh::{
2828
known_hosts::get_check_method,
29-
tokio_client::{AuthMethod, Client, Error as SshError, ServerCheckMethod},
29+
tokio_client::{AuthMethod, Client, Error as SshError, ServerCheckMethod, SshConnectionConfig},
3030
};
3131

3232
use super::types::{InteractiveCommand, NodeSession};
@@ -37,6 +37,8 @@ impl InteractiveCommand {
3737
///
3838
/// If `allow_password_fallback` is true and key authentication fails, it will prompt for password
3939
/// and retry with password authentication (matching OpenSSH behavior).
40+
///
41+
/// The `ssh_config` parameter allows configuring SSH connection settings like keepalive intervals.
4042
async fn establish_connection(
4143
addr: (&str, u16),
4244
username: &str,
@@ -45,6 +47,7 @@ impl InteractiveCommand {
4547
host: &str,
4648
port: u16,
4749
allow_password_fallback: bool,
50+
ssh_config: &SshConnectionConfig,
4851
) -> Result<Client> {
4952
const SSH_CONNECT_TIMEOUT_SECS: u64 = 30;
5053
let connect_timeout = Duration::from_secs(SSH_CONNECT_TIMEOUT_SECS);
@@ -59,9 +62,10 @@ impl InteractiveCommand {
5962
// SECURITY: Capture start time for timing attack mitigation
6063
let start_time = std::time::Instant::now();
6164

65+
// Use connect_with_ssh_config to properly apply keepalive settings
6266
let result = timeout(
6367
connect_timeout,
64-
Client::connect(addr, username, auth_method, check_method.clone()),
68+
Client::connect_with_ssh_config(addr, username, auth_method, check_method.clone(), ssh_config),
6569
)
6670
.await
6771
.with_context(|| {
@@ -93,9 +97,10 @@ impl InteractiveCommand {
9397
// Small delay before retry to prevent rapid attempts
9498
tokio::time::sleep(Duration::from_millis(500)).await;
9599

100+
// Use connect_with_ssh_config for password retry as well
96101
timeout(
97102
connect_timeout,
98-
Client::connect(addr, username, password_auth, check_method),
103+
Client::connect_with_ssh_config(addr, username, password_auth, check_method, ssh_config),
99104
)
100105
.await
101106
.with_context(|| {
@@ -231,6 +236,7 @@ impl InteractiveCommand {
231236
&node.host,
232237
node.port,
233238
!self.use_password, // Allow fallback unless explicit password mode
239+
&self.ssh_connection_config,
234240
)
235241
.await?
236242
} else {
@@ -255,9 +261,11 @@ impl InteractiveCommand {
255261
.min(MAX_TIMEOUT_SECS),
256262
);
257263

264+
// Pass SSH connection config to jump host chain for keepalive settings
258265
let chain = JumpHostChain::new(jump_hosts)
259266
.with_connect_timeout(adjusted_timeout)
260-
.with_command_timeout(Duration::from_secs(300));
267+
.with_command_timeout(Duration::from_secs(300))
268+
.with_ssh_connection_config(self.ssh_connection_config.clone());
261269

262270
// Connect through the chain
263271
let connection = timeout(
@@ -308,6 +316,7 @@ impl InteractiveCommand {
308316
&node.host,
309317
node.port,
310318
!self.use_password, // Allow fallback unless explicit password mode
319+
&self.ssh_connection_config,
311320
)
312321
.await?
313322
};
@@ -371,6 +380,7 @@ impl InteractiveCommand {
371380
&node.host,
372381
node.port,
373382
!self.use_password, // Allow fallback unless explicit password mode
383+
&self.ssh_connection_config,
374384
)
375385
.await?
376386
} else {
@@ -395,9 +405,11 @@ impl InteractiveCommand {
395405
.min(MAX_TIMEOUT_SECS),
396406
);
397407

408+
// Pass SSH connection config to jump host chain for keepalive settings
398409
let chain = JumpHostChain::new(jump_hosts)
399410
.with_connect_timeout(adjusted_timeout)
400-
.with_command_timeout(Duration::from_secs(300));
411+
.with_command_timeout(Duration::from_secs(300))
412+
.with_ssh_connection_config(self.ssh_connection_config.clone());
401413

402414
// Connect through the chain
403415
let connection = timeout(
@@ -448,6 +460,7 @@ impl InteractiveCommand {
448460
&node.host,
449461
node.port,
450462
!self.use_password, // Allow fallback unless explicit password mode
463+
&self.ssh_connection_config,
451464
)
452465
.await?
453466
};

src/commands/interactive/types.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::config::{Config, InteractiveConfig};
2424
use crate::node::Node;
2525
use crate::pty::PtyConfig;
2626
use crate::ssh::known_hosts::StrictHostKeyChecking;
27-
use crate::ssh::tokio_client::Client;
27+
use crate::ssh::tokio_client::{Client, SshConnectionConfig};
2828

2929
/// SSH output polling interval for responsive display
3030
/// - 10ms provides very responsive output display
@@ -61,6 +61,8 @@ pub struct InteractiveCommand {
6161
// PTY configuration
6262
pub pty_config: PtyConfig,
6363
pub use_pty: Option<bool>, // None = auto-detect, Some(true) = force, Some(false) = disable
64+
// SSH connection configuration (keepalive settings)
65+
pub ssh_connection_config: SshConnectionConfig,
6466
}
6567

6668
/// Result of an interactive session

src/commands/interactive/utils.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ mod tests {
7171
use crate::config::{Config, InteractiveConfig};
7272
use crate::pty::PtyConfig;
7373
use crate::ssh::known_hosts::StrictHostKeyChecking;
74+
use crate::ssh::tokio_client::SshConnectionConfig;
7475

7576
#[test]
7677
fn test_expand_path_with_tilde() {
@@ -93,6 +94,7 @@ mod tests {
9394
jump_hosts: None,
9495
pty_config: PtyConfig::default(),
9596
use_pty: None,
97+
ssh_connection_config: SshConnectionConfig::default(),
9698
};
9799

98100
let path = PathBuf::from("~/test/file.txt");
@@ -126,6 +128,7 @@ mod tests {
126128
jump_hosts: None,
127129
pty_config: PtyConfig::default(),
128130
use_pty: None,
131+
ssh_connection_config: SshConnectionConfig::default(),
129132
};
130133

131134
let node = Node::new(String::from("example.com"), 22, String::from("alice"));

src/pty/session/constants.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,19 @@ pub const INPUT_POLL_TIMEOUT_MS: u64 = 500;
6161
/// - Tasks should check cancellation signal frequently (10-50ms intervals)
6262
pub const TASK_CLEANUP_TIMEOUT_MS: u64 = 100;
6363

64+
/// Connection health check interval for PTY sessions
65+
/// - 30 seconds provides periodic checks without excessive overhead
66+
/// - Detects dead connections even when SSH keepalive is disabled
67+
/// - Works alongside SSH-level keepalive for defense in depth
68+
/// - Short enough to detect issues before users get frustrated
69+
pub const CONNECTION_HEALTH_CHECK_INTERVAL_SECS: u64 = 30;
70+
71+
/// Maximum idle time before considering connection potentially dead
72+
/// - 300 seconds (5 minutes) is a reasonable threshold for interactive sessions
73+
/// - If no data received within this time, trigger a health check warning
74+
/// - This is a secondary mechanism to SSH-level keepalive
75+
pub const MAX_IDLE_TIME_BEFORE_WARNING_SECS: u64 = 300;
76+
6477
// Const arrays for frequently used key sequences to avoid repeated allocations.
6578
/// Control key sequences - frequently used in terminal input
6679
pub const CTRL_C_SEQUENCE: &[u8] = &[0x03]; // Ctrl+C (SIGINT)

src/pty/session/session_manager.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,10 +299,21 @@ impl PtySession {
299299
let mut should_terminate = false;
300300
let mut cancel_rx = self.cancel_rx.clone();
301301

302+
// Track last activity time for connection health monitoring
303+
let mut last_activity = std::time::Instant::now();
304+
let health_check_interval =
305+
Duration::from_secs(CONNECTION_HEALTH_CHECK_INTERVAL_SECS);
306+
let max_idle_time = Duration::from_secs(MAX_IDLE_TIME_BEFORE_WARNING_SECS);
307+
let mut idle_warning_shown = false;
308+
302309
while !should_terminate {
303310
tokio::select! {
304311
// Handle SSH channel messages
305312
msg = self.channel.wait() => {
313+
// Reset activity timer on any channel activity
314+
last_activity = std::time::Instant::now();
315+
idle_warning_shown = false;
316+
306317
match msg {
307318
Some(ChannelMsg::Data { ref data }) => {
308319
// Filter terminal escape sequence responses before display
@@ -342,18 +353,29 @@ impl PtySession {
342353
// Handle other channel messages if needed
343354
}
344355
None => {
345-
// Channel ended
356+
// Channel ended - connection is dead
357+
tracing::warn!(
358+
"SSH channel returned None - connection may have dropped"
359+
);
346360
should_terminate = true;
347361
}
348362
}
349363
}
350364

351365
// Handle local messages (input, resize, etc.)
352366
message = msg_rx.recv() => {
367+
// Reset activity timer for local input (user is active)
368+
if matches!(message, Some(PtyMessage::LocalInput(_))) {
369+
last_activity = std::time::Instant::now();
370+
idle_warning_shown = false;
371+
}
372+
353373
match message {
354374
Some(PtyMessage::LocalInput(data)) => {
355375
if let Err(e) = self.channel.data(data.as_slice()).await {
356376
tracing::error!("Failed to send data to SSH channel: {e}");
377+
// Connection likely dead - terminate gracefully
378+
eprintln!("\r\n[bssh] Connection lost: failed to send data to remote host\r");
357379
should_terminate = true;
358380
}
359381
}
@@ -400,6 +422,30 @@ impl PtySession {
400422
should_terminate = true;
401423
}
402424
}
425+
426+
// Periodic health check to detect dead connections
427+
_ = tokio::time::sleep(health_check_interval) => {
428+
let idle_duration = last_activity.elapsed();
429+
430+
// Check if the session has been idle for too long
431+
if idle_duration > max_idle_time && !idle_warning_shown {
432+
tracing::debug!(
433+
"PTY session {} idle for {:?}, connection may be stale",
434+
self.session_id,
435+
idle_duration
436+
);
437+
// Don't terminate, but log for debugging
438+
// SSH keepalive should handle actual connection detection
439+
idle_warning_shown = true;
440+
}
441+
442+
// Periodic trace logging for debugging long sessions
443+
tracing::trace!(
444+
"PTY session {} health check: idle for {:?}",
445+
self.session_id,
446+
idle_duration
447+
);
448+
}
403449
}
404450
}
405451

0 commit comments

Comments
 (0)