Skip to content

Commit c7c8d04

Browse files
chantrafacebook-github-bot
authored andcommitted
[antlir2_vm] dump eth0 traffic when running in test mode
Summary: This diff provides a mechanism to dump eth0 traffic when running an antlir VM. Essentially, this adds a new option to `antlir2_vm run` called `--eth0-output-file`. When it is set, qemu will be configured to dump the traffic of `net0` interface to that file. By default, nothing is dumped. To plug this into the testing framework, when `antlir2_vm` test is called, it creates the blob artifacts that will be uploaded to TPX and passes the resulting file to `antlir2_vm run` via `--eth0-output-file`. Test Plan: ``` $ buck2 run fbcode//antlir/antlir2/antlir2_vm:antlir2_vm -- run -h Buck UI: https://www.internalfb.com/buck2/64d2ce42-afe3-42cf-9970-76387fcbb5a8 Network: Up: 0B Down: 0B Jobs completed: 6. Time elapsed: 0.0s. BUILD SUCCEEDED Run the VM. Must be executed inside container Usage: antlir2_vm run [OPTIONS] --machine-spec <MACHINE_SPEC> [COMMAND]... Arguments: [COMMAND]... Execute command through ssh inside VM Options: --machine-spec <MACHINE_SPEC> Json-encoded file for VM machine configuration --expect-failure Expects the VM to timeout or terminate early --postmortem The command should be run after VM termination. Console log will be available at env $CONSOLE_OUTPUT --timeout-secs <TIMEOUT_SECS> Timeout in seconds before VM will be terminated. None disables the timeout, which should only be used for interactive shells for development --console-output-file <CONSOLE_OUTPUT_FILE> Redirect console output to file. By default it's suppressed --output-dirs <OUTPUT_DIRS> Output directories that need to be available inside VM --command-envs <COMMAND_ENVS> Environment variables for the command --first-boot-command <FIRST_BOOT_COMMAND> Command requires first boot --eth0-output-file <ETH0_OUTPUT_FILE> Dump network traffic on eth0 to output to file. By default it is not dumped --console Drop into console prompt. This also enables console output on screen, unless `--console-output-file` is specified --container Drop into container shell outside VM -h, --help Print help ``` and ran: ``` buck2 test $(kerctl vmtest-config -e everstore:GICWmAACj33BEzsFAElJ4glD5YhXbuYfAAAf) fbcode//kernel/vmtest/uname_test:uname_test-6.9-local ``` https://www.internalfb.com/intern/testinfra/testconsole/testrun/6755399674977058/ leads to https://www.internalfb.com/intern/testinfra/diagnostics/6755399674977058.562950104331417.1727383743/ which has an "Artifacts" section with eth0.pcap Reviewed By: wujj123456 Differential Revision: D63487026 fbshipit-source-id: f6e63d2202a08d7b324364f7b492818de3224230
1 parent b3d0e2d commit c7c8d04

File tree

3 files changed

+56
-9
lines changed

3 files changed

+56
-9
lines changed

antlir/antlir2/antlir2_vm/src/main.rs

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use crate::share::VirtiofsShare;
4444
use crate::types::MachineOpts;
4545
use crate::types::MountPlatformDecision;
4646
use crate::types::VMArgs;
47+
use crate::utils::create_tpx_blobs;
4748
use crate::utils::create_tpx_logs;
4849
use crate::utils::env_names_to_kvpairs;
4950
use crate::utils::log_command;
@@ -97,6 +98,9 @@ struct IsolateCmdArgs {
9798
/// Extra RW bind-mount into the VM for debugging purpose
9899
#[arg(long)]
99100
scratch_dir: Option<PathBuf>,
101+
/// Whether or not to dump the VM's eth0 traffic to a file. When running the test command, this will set eth0_output_file to a file that will be uploaded to tpx.
102+
#[arg(long, default_value_t = false)]
103+
dump_eth0_traffic: bool,
100104
/// Args for run command
101105
#[clap(flatten)]
102106
run_cmd_args: RunCmdArgs,
@@ -260,7 +264,11 @@ fn record_envs(_envs: &[KvPair]) -> Result<()> {
260264

261265
/// Further validate `VMArgs` parsed by clap and generate a new `VMArgs` with
262266
/// content specific to test execution.
263-
fn get_test_vm_args(orig_args: &VMArgs, cli_envs: Vec<String>) -> Result<ValidatedVMArgs> {
267+
fn get_test_vm_args(
268+
orig_args: &VMArgs,
269+
cli_envs: Vec<String>,
270+
dump_eth0_traffic: bool,
271+
) -> Result<ValidatedVMArgs> {
264272
if orig_args.timeout_secs.is_none() {
265273
return Err(anyhow!("Test command must specify --timeout-secs."));
266274
}
@@ -302,6 +310,9 @@ fn get_test_vm_args(orig_args: &VMArgs, cli_envs: Vec<String>) -> Result<Validat
302310
vm_args.mode.command = Some(test_args.test.into_inner_cmd());
303311
vm_args.command_envs = envs;
304312
vm_args.console_output_file = create_tpx_logs("console.txt", "console logs")?;
313+
if dump_eth0_traffic {
314+
vm_args.eth0_output_file = create_tpx_blobs("eth0.pcap", "eth0 traffic")?;
315+
}
305316
Ok(ValidatedVMArgs {
306317
inner: vm_args,
307318
is_list,
@@ -381,7 +392,11 @@ fn test(args: &IsolateCmdArgs) -> Result<()> {
381392
// It may then decide whether to use host's platform for the actual test.
382393
Platform::set(&MountPlatformDecision(true))?;
383394

384-
let validated_args = get_test_vm_args(&args.run_cmd_args.vm_args, args.passenv.clone())?;
395+
let validated_args = get_test_vm_args(
396+
&args.run_cmd_args.vm_args,
397+
args.passenv.clone(),
398+
args.dump_eth0_traffic,
399+
)?;
385400
antlir2_rootless::unshare_new_userns()?;
386401
antlir2_isolate::unshare_and_privatize_mount_ns().context("while isolating mount ns")?;
387402
let mut command = if validated_args.is_list {
@@ -441,27 +456,28 @@ mod test {
441456
};
442457
let mut expected = valid.clone();
443458
expected.mode.command = Some(vec![OsString::from("whatever")]);
444-
let parsed = get_test_vm_args(&valid, vec![]).expect("Parsing should succeed");
459+
let parsed = get_test_vm_args(&valid, vec![], false).expect("Parsing should succeed");
445460
assert_eq!(parsed.inner.mode, expected.mode);
446461
assert!(!parsed.is_list);
447462

448463
let mut timeout = valid.clone();
449464
timeout.timeout_secs = None;
450-
assert!(get_test_vm_args(&timeout, vec![]).is_err());
465+
assert!(get_test_vm_args(&timeout, vec![], false).is_err());
451466

452467
let mut output_dirs = valid.clone();
453468
output_dirs.output_dirs = vec![PathBuf::from("/some")];
454-
assert!(get_test_vm_args(&output_dirs, vec![]).is_err());
469+
assert!(get_test_vm_args(&output_dirs, vec![], false).is_err());
455470

456471
let mut command = valid.clone();
457472
command.mode.command = None;
458-
assert!(get_test_vm_args(&command, vec![]).is_err());
473+
assert!(get_test_vm_args(&command, vec![], false).is_err());
459474
command.mode.command = Some(vec![OsString::from("invalid")]);
460-
assert!(get_test_vm_args(&command, vec![]).is_err());
475+
assert!(get_test_vm_args(&command, vec![], false).is_err());
461476

462477
let env_var_test = valid;
463478
std::env::set_var("TEST_PILOT_A", "A");
464-
let parsed = get_test_vm_args(&env_var_test, vec![]).expect("Parsing should succeed");
479+
let parsed =
480+
get_test_vm_args(&env_var_test, vec![], false).expect("Parsing should succeed");
465481
assert!(
466482
parsed
467483
.inner

antlir/antlir2/antlir2_vm/src/types.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ pub(crate) struct VMArgs {
9696
/// Command requires first boot
9797
#[clap(long)]
9898
pub(crate) first_boot_command: Option<String>,
99+
/// Dump network traffic on eth0 to output to file. By default it is not dumped.
100+
#[clap(long)]
101+
pub(crate) eth0_output_file: Option<PathBuf>,
99102
/// Operation for VM to carry out
100103
#[clap(flatten)]
101104
pub(crate) mode: VMModeArgs,
@@ -131,6 +134,10 @@ impl VMArgs {
131134
args.push("--console-output-file".into());
132135
args.push(path.into());
133136
}
137+
if let Some(path) = &self.eth0_output_file {
138+
args.push("--eth0-output-file".into());
139+
args.push(path.into());
140+
}
134141
self.command_envs.iter().for_each(|pair| {
135142
args.push("--command-envs".into());
136143
let mut kv_str = OsString::new();
@@ -176,6 +183,14 @@ impl VMArgs {
176183
outputs.insert(env::current_dir().expect("current dir must be valid"));
177184
}
178185
}
186+
// eth0 output needs to be accessible for debugging and uploading
187+
if let Some(file_path) = &self.eth0_output_file {
188+
if let Some(parent) = file_path.parent() {
189+
outputs.insert(parent.to_path_buf());
190+
} else {
191+
outputs.insert(env::current_dir().expect("current dir must be valid"));
192+
}
193+
}
179194
outputs
180195
}
181196
}

antlir/antlir2/antlir2_vm/src/vm.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::io::BufRead;
1313
use std::io::BufReader;
1414
use std::io::ErrorKind;
1515
use std::io::Read;
16+
use std::io::Write;
1617
use std::net::Shutdown;
1718
use std::os::unix::net::UnixStream;
1819
use std::path::Path;
@@ -31,6 +32,7 @@ use thiserror::Error;
3132
use tracing::debug;
3233
use tracing::error;
3334
use tracing::info;
35+
use tracing::warn;
3436
use uuid::Uuid;
3537

3638
use crate::disk::QCow2DiskError;
@@ -131,7 +133,20 @@ impl<S: Share> VM<S> {
131133
&state_dir,
132134
machine.mem_mib,
133135
)?;
134-
let nics = VirtualNICs::new(machine.num_nics, machine.max_combined_channels)?;
136+
let mut nics = VirtualNICs::new(machine.num_nics, machine.max_combined_channels)?;
137+
if nics.len() > 0 {
138+
if let Err(e) = nics[0].try_dump_file(args.eth0_output_file.clone()) {
139+
let err = format!("Failed to set eth0 dump file: {:?}", e);
140+
warn!(err);
141+
// Leave a hint that we could not set the dump file by writting a textual error in the .pcap file.
142+
// This will generate a corrupted .pcap file that an operator can look into to debug and understand what went wrong.
143+
if let Some(filename) = args.eth0_output_file.as_ref() {
144+
// If any part of this fail, we don't want to fail the VM creation.
145+
let _ =
146+
fs::File::create(filename).and_then(|mut f| f.write_all(err.as_bytes()));
147+
}
148+
}
149+
}
135150
let tpm = match machine.use_tpm {
136151
true => Some(TPMDevice::new(&state_dir)?),
137152
false => None,
@@ -391,6 +406,7 @@ impl<S: Share> VM<S> {
391406
if let Some(tpm) = &self.tpm {
392407
args.extend(tpm.qemu_args());
393408
}
409+
394410
let mut command = Command::new(match self.machine.arch {
395411
CpuIsa::AARCH64 => "qemu-system-aarch64",
396412
CpuIsa::X86_64 => "qemu-system-x86_64",

0 commit comments

Comments
 (0)