Skip to content

Commit b94e87d

Browse files
authored
fix: don't seal empty segments on finalize (#127)
When `finalize()` is called right after a rotation, the current segment contains only a file header and SegmentMetadata — no real trace events. Previously this empty segment was sealed as a `.bin` file, picked up by the background worker, and uploaded to S3. This caused the `stress_test_all_segments_uploaded_and_valid` test to flake (~1hr to reproduce) when it asserted every uploaded segment has events. The fix tracks whether any real events have been written to the current segment (`has_real_events`). On `finalize()`, if no real events exist, the `.active` file is deleted instead of sealed. This also avoids wasting bandwidth uploading empty segments in production. Also adds an optional `test_filter` input to the stress test workflow so individual tests can be targeted for validation.
1 parent 78fed99 commit b94e87d

2 files changed

Lines changed: 70 additions & 6 deletions

File tree

.github/workflows/stress-test.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ on:
77
required: false
88
default: "60"
99
type: string
10+
test_filter:
11+
description: "Optional nextest filter expression (e.g. test name) to focus the stress test"
12+
required: false
13+
default: ""
14+
type: string
1015
schedule:
1116
- cron: "0 6 * * *"
1217
permissions:
@@ -33,4 +38,9 @@ jobs:
3338
- name: Run stress test
3439
run: |
3540
sudo prlimit --pid $$ --memlock=unlimited:unlimited
36-
cargo nextest run --all-features --stress-duration '${{ inputs.duration_minutes || '60' }}m'
41+
FILTER="${{ inputs.test_filter }}"
42+
if [ -n "$FILTER" ]; then
43+
cargo nextest run --all-features --stress-duration '${{ inputs.duration_minutes || '60' }}m' -E "test($FILTER)"
44+
else
45+
cargo nextest run --all-features --stress-duration '${{ inputs.duration_minutes || '60' }}m'
46+
fi

dial9-tokio-telemetry/src/telemetry/writer.rs

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ pub struct RotatingWriter {
9292
formatted_locations: HashMap<std::panic::Location<'static>, String>,
9393
/// Events silently dropped because the writer was finished/stopped.
9494
dropped_events: usize,
95+
/// Whether any real (non-metadata) events have been written to the current segment.
96+
/// Reset on rotation; used by `finalize()` to avoid sealing empty segments.
97+
has_real_events: bool,
9598
}
9699

97100
// the write side is obviously marge larger than the `Finished` size so clippy warns on this
@@ -152,6 +155,7 @@ impl RotatingWriter {
152155
segment_metadata,
153156
formatted_locations: HashMap::new(),
154157
dropped_events: 0,
158+
has_real_events: false,
155159
};
156160
w.write_segment_metadata()?;
157161
Ok(w)
@@ -181,6 +185,7 @@ impl RotatingWriter {
181185
segment_metadata: None,
182186
formatted_locations: HashMap::new(),
183187
dropped_events: 0,
188+
has_real_events: false,
184189
};
185190
w.write_segment_metadata()?;
186191
Ok(w)
@@ -240,6 +245,7 @@ impl RotatingWriter {
240245
self.state = WriterState::Active(Encoder::new_to(writer)?);
241246
self.active_path = new_path;
242247
self.did_rotate = true;
248+
self.has_real_events = false;
243249
self.write_segment_metadata()?;
244250

245251
tracing::info!(
@@ -408,6 +414,7 @@ impl RotatingWriter {
408414
})
409415
}
410416
}?;
417+
self.has_real_events = true;
411418
Ok(())
412419
}
413420

@@ -458,9 +465,23 @@ impl TraceWriter for RotatingWriter {
458465
.extension()
459466
.is_some_and(|ext| ext == "active")
460467
{
461-
let sealed = Self::file_path(&self.base_path, self.next_index - 1);
462-
fs::rename(&self.active_path, &sealed)?;
463-
self.active_path = sealed;
468+
if self.has_real_events {
469+
let sealed = Self::file_path(&self.base_path, self.next_index - 1);
470+
fs::rename(&self.active_path, &sealed)?;
471+
self.active_path = sealed;
472+
} else {
473+
// No real events — just header + metadata. Remove instead of
474+
// sealing so the background worker doesn't upload an empty segment.
475+
tracing::debug!(
476+
"removing empty final segment {}",
477+
self.active_path.display()
478+
);
479+
if let Err(e) = fs::remove_file(&self.active_path)
480+
&& e.kind() != std::io::ErrorKind::NotFound
481+
{
482+
return Err(e);
483+
}
484+
}
464485
}
465486
self.state = WriterState::Finished;
466487
Ok(())
@@ -597,8 +618,15 @@ mod tests {
597618
let mut writer = RotatingWriter::new(&base, 1024, 4096).unwrap();
598619
writer.finalize().unwrap();
599620

600-
let events = read_trace_events(&rotating_file(&base, 0));
601-
assert_eq!(events.len(), 0);
621+
// No real events were written, so finalize removes the empty segment.
622+
assert!(
623+
!dir.path().join("trace.0.bin").exists(),
624+
"empty segment should not be sealed"
625+
);
626+
assert!(
627+
!dir.path().join("trace.0.bin.active").exists(),
628+
"active file should be removed"
629+
);
602630
}
603631

604632
#[test]
@@ -953,6 +981,32 @@ mod tests {
953981
);
954982
}
955983

984+
#[test]
985+
fn test_finalize_removes_empty_segment_after_rotation() {
986+
let dir = TempDir::new().unwrap();
987+
let base = dir.path().join("trace");
988+
// Small max_file_size so one event triggers rotation.
989+
let mut writer = RotatingWriter::new(&base, 1, 100_000).unwrap();
990+
// Write an event — this fills segment 0 and triggers rotation to segment 1.
991+
writer.write_event(&park_event()).unwrap();
992+
// Segment 0 is sealed, segment 1 is active with only header + metadata.
993+
assert!(dir.path().join("trace.0.bin").exists());
994+
assert!(dir.path().join("trace.1.bin.active").exists());
995+
996+
// Finalize should remove the empty segment 1 instead of sealing it.
997+
writer.finalize().unwrap();
998+
assert!(
999+
!dir.path().join("trace.1.bin").exists(),
1000+
"empty segment should not be sealed"
1001+
);
1002+
assert!(
1003+
!dir.path().join("trace.1.bin.active").exists(),
1004+
"empty active file should be removed"
1005+
);
1006+
// Segment 0 should still exist.
1007+
assert!(dir.path().join("trace.0.bin").exists());
1008+
}
1009+
9561010
#[test]
9571011
fn test_single_file_no_active_suffix() {
9581012
let dir = TempDir::new().unwrap();

0 commit comments

Comments
 (0)