Skip to content

Commit 30d27c2

Browse files
Test checking close waits
1 parent d6f00b9 commit 30d27c2

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed

shotover-proxy/tests/kafka_int_tests/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,11 @@ async fn cluster_1_rack_single_shotover_broker_idle_timeout(#[case] driver: Kafk
350350
// So instead we rely on a test case hits the timeout with plenty of buffer to avoid the race condition.
351351
test_cases::test_broker_idle_timeout(&connection_builder).await;
352352

353+
//we had an issue where shotover was not acknowledging the closed connecitons from Kafka
354+
//leaving around close-wait state connections. This step ensures no such connections are left behind
355+
//for the given destination port and owned by the shotover process
356+
test_cases::assert_no_close_wait_connections(shotover.pid());
357+
353358
tokio::time::timeout(
354359
Duration::from_secs(10),
355360
shotover.shutdown_and_then_consume_events(&[]),

shotover-proxy/tests/kafka_int_tests/test_cases.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2100,3 +2100,45 @@ pub async fn test_no_out_of_rack_request(connection_builder: &KafkaConnectionBui
21002100
0
21012101
);
21022102
}
2103+
2104+
/// Counts CLOSE_WAIT connections for a given PID using lsof
2105+
pub fn count_close_wait_connections_for_process(pid: i32) -> anyhow::Result<usize> {
2106+
use test_helpers::run_command;
2107+
2108+
// Use lsof to get network connections for the specific PID
2109+
// -i: network files
2110+
// -P: don't resolve port numbers to names
2111+
// -n: don't resolve hostnames
2112+
// -a: AND the conditions together
2113+
let output = run_command(
2114+
"lsof",
2115+
&["-i", "-P", "-n", "-a", "-p", &pid.to_string()],
2116+
)?;
2117+
2118+
let mut count = 0;
2119+
2120+
for line in output.lines() {
2121+
// lsof output format (columns vary, but typically):
2122+
// COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
2123+
// The NAME column contains the connection info like "127.0.0.1:54321->172.16.1.2:9092 (CLOSE_WAIT)"
2124+
2125+
// Check if line contains CLOSE_WAIT state
2126+
if line.contains("CLOSE_WAIT") {
2127+
count += 1;
2128+
}
2129+
}
2130+
Ok(count)
2131+
}
2132+
2133+
/// Asserts that there are no CLOSE_WAIT connections for the given PID and destination port
2134+
pub fn assert_no_close_wait_connections(pid: i32) {
2135+
let count = count_close_wait_connections_for_process(pid)
2136+
.expect("Failed to check for CLOSE_WAIT connections");
2137+
2138+
assert_eq!(
2139+
count, 0,
2140+
"Found {} CLOSE_WAIT connection(s) for PID {}. \
2141+
This indicates connections that were closed by the remote peer but not yet closed by the local process.",
2142+
count, pid
2143+
);
2144+
}

0 commit comments

Comments
 (0)