Skip to content

Commit

Permalink
[antlir2_vm] dump eth0 traffic when running in test mode
Browse files Browse the repository at this point in the history
Summary:
This diff provides a mechanism to dump eth0 traffic when running an
antlir VM.
Essentially, this adds a new option to `antlir2_vm run` called `--eth0-output-file`.

When it is set, qemu will be configured to dump the traffic of `net0` interface to
that file. By default, nothing is dumped.

To plug this into the testing framework, when `antlir2_vm` test is called, it
creates the blob artifacts that will be uploaded to TPX and passes the resulting
file to `antlir2_vm run` via `--eth0-output-file`.

Test Plan:
```
 $ buck2 run fbcode//antlir/antlir2/antlir2_vm:antlir2_vm -- run -h
Buck UI: https://www.internalfb.com/buck2/64d2ce42-afe3-42cf-9970-76387fcbb5a8
Network: Up: 0B  Down: 0B
Jobs completed: 6. Time elapsed: 0.0s.
BUILD SUCCEEDED
Run the VM. Must be executed inside container

Usage: antlir2_vm run [OPTIONS] --machine-spec <MACHINE_SPEC> [COMMAND]...

Arguments:
  [COMMAND]...  Execute command through ssh inside VM

Options:
      --machine-spec <MACHINE_SPEC>                Json-encoded file for VM machine configuration
      --expect-failure                             Expects the VM to timeout or terminate early
      --postmortem                                 The command should be run after VM termination. Console log will be available at env $CONSOLE_OUTPUT
      --timeout-secs <TIMEOUT_SECS>                Timeout in seconds before VM will be terminated. None disables the timeout, which should only be used for interactive shells for development
      --console-output-file <CONSOLE_OUTPUT_FILE>  Redirect console output to file. By default it's suppressed
      --output-dirs <OUTPUT_DIRS>                  Output directories that need to be available inside VM
      --command-envs <COMMAND_ENVS>                Environment variables for the command
      --first-boot-command <FIRST_BOOT_COMMAND>    Command requires first boot
      --eth0-output-file <ETH0_OUTPUT_FILE>        Dump network traffic on eth0 to output to file. By default it is not dumped
      --console                                    Drop into console prompt. This also enables console output on screen, unless `--console-output-file` is specified
      --container                                  Drop into container shell outside VM
  -h, --help                                       Print help
```

and ran:
```
buck2 test  $(kerctl vmtest-config -e everstore:GICWmAACj33BEzsFAElJ4glD5YhXbuYfAAAf) fbcode//kernel/vmtest/uname_test:uname_test-6.9-local
```

https://www.internalfb.com/intern/testinfra/testconsole/testrun/6755399674977058/
leads to
https://www.internalfb.com/intern/testinfra/diagnostics/6755399674977058.562950104331417.1727383743/
which has an "Artifacts" section with eth0.pcap

Reviewed By: wujj123456

Differential Revision: D63487026

fbshipit-source-id: f6e63d2202a08d7b324364f7b492818de3224230
  • Loading branch information
chantra authored and facebook-github-bot committed Sep 30, 2024
1 parent b3d0e2d commit c7c8d04
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 9 deletions.
32 changes: 24 additions & 8 deletions antlir/antlir2/antlir2_vm/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use crate::share::VirtiofsShare;
use crate::types::MachineOpts;
use crate::types::MountPlatformDecision;
use crate::types::VMArgs;
use crate::utils::create_tpx_blobs;
use crate::utils::create_tpx_logs;
use crate::utils::env_names_to_kvpairs;
use crate::utils::log_command;
Expand Down Expand Up @@ -97,6 +98,9 @@ struct IsolateCmdArgs {
/// Extra RW bind-mount into the VM for debugging purpose
#[arg(long)]
scratch_dir: Option<PathBuf>,
/// Whether or not to dump the VM's eth0 traffic to a file. When running the test command, this will set eth0_output_file to a file that will be uploaded to tpx.
#[arg(long, default_value_t = false)]
dump_eth0_traffic: bool,
/// Args for run command
#[clap(flatten)]
run_cmd_args: RunCmdArgs,
Expand Down Expand Up @@ -260,7 +264,11 @@ fn record_envs(_envs: &[KvPair]) -> Result<()> {

/// Further validate `VMArgs` parsed by clap and generate a new `VMArgs` with
/// content specific to test execution.
fn get_test_vm_args(orig_args: &VMArgs, cli_envs: Vec<String>) -> Result<ValidatedVMArgs> {
fn get_test_vm_args(
orig_args: &VMArgs,
cli_envs: Vec<String>,
dump_eth0_traffic: bool,
) -> Result<ValidatedVMArgs> {
if orig_args.timeout_secs.is_none() {
return Err(anyhow!("Test command must specify --timeout-secs."));
}
Expand Down Expand Up @@ -302,6 +310,9 @@ fn get_test_vm_args(orig_args: &VMArgs, cli_envs: Vec<String>) -> Result<Validat
vm_args.mode.command = Some(test_args.test.into_inner_cmd());
vm_args.command_envs = envs;
vm_args.console_output_file = create_tpx_logs("console.txt", "console logs")?;
if dump_eth0_traffic {
vm_args.eth0_output_file = create_tpx_blobs("eth0.pcap", "eth0 traffic")?;
}
Ok(ValidatedVMArgs {
inner: vm_args,
is_list,
Expand Down Expand Up @@ -381,7 +392,11 @@ fn test(args: &IsolateCmdArgs) -> Result<()> {
// It may then decide whether to use host's platform for the actual test.
Platform::set(&MountPlatformDecision(true))?;

let validated_args = get_test_vm_args(&args.run_cmd_args.vm_args, args.passenv.clone())?;
let validated_args = get_test_vm_args(
&args.run_cmd_args.vm_args,
args.passenv.clone(),
args.dump_eth0_traffic,
)?;
antlir2_rootless::unshare_new_userns()?;
antlir2_isolate::unshare_and_privatize_mount_ns().context("while isolating mount ns")?;
let mut command = if validated_args.is_list {
Expand Down Expand Up @@ -441,27 +456,28 @@ mod test {
};
let mut expected = valid.clone();
expected.mode.command = Some(vec![OsString::from("whatever")]);
let parsed = get_test_vm_args(&valid, vec![]).expect("Parsing should succeed");
let parsed = get_test_vm_args(&valid, vec![], false).expect("Parsing should succeed");
assert_eq!(parsed.inner.mode, expected.mode);
assert!(!parsed.is_list);

let mut timeout = valid.clone();
timeout.timeout_secs = None;
assert!(get_test_vm_args(&timeout, vec![]).is_err());
assert!(get_test_vm_args(&timeout, vec![], false).is_err());

let mut output_dirs = valid.clone();
output_dirs.output_dirs = vec![PathBuf::from("/some")];
assert!(get_test_vm_args(&output_dirs, vec![]).is_err());
assert!(get_test_vm_args(&output_dirs, vec![], false).is_err());

let mut command = valid.clone();
command.mode.command = None;
assert!(get_test_vm_args(&command, vec![]).is_err());
assert!(get_test_vm_args(&command, vec![], false).is_err());
command.mode.command = Some(vec![OsString::from("invalid")]);
assert!(get_test_vm_args(&command, vec![]).is_err());
assert!(get_test_vm_args(&command, vec![], false).is_err());

let env_var_test = valid;
std::env::set_var("TEST_PILOT_A", "A");
let parsed = get_test_vm_args(&env_var_test, vec![]).expect("Parsing should succeed");
let parsed =
get_test_vm_args(&env_var_test, vec![], false).expect("Parsing should succeed");
assert!(
parsed
.inner
Expand Down
15 changes: 15 additions & 0 deletions antlir/antlir2/antlir2_vm/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ pub(crate) struct VMArgs {
/// Command requires first boot
#[clap(long)]
pub(crate) first_boot_command: Option<String>,
/// Dump network traffic on eth0 to output to file. By default it is not dumped.
#[clap(long)]
pub(crate) eth0_output_file: Option<PathBuf>,
/// Operation for VM to carry out
#[clap(flatten)]
pub(crate) mode: VMModeArgs,
Expand Down Expand Up @@ -131,6 +134,10 @@ impl VMArgs {
args.push("--console-output-file".into());
args.push(path.into());
}
if let Some(path) = &self.eth0_output_file {
args.push("--eth0-output-file".into());
args.push(path.into());
}
self.command_envs.iter().for_each(|pair| {
args.push("--command-envs".into());
let mut kv_str = OsString::new();
Expand Down Expand Up @@ -176,6 +183,14 @@ impl VMArgs {
outputs.insert(env::current_dir().expect("current dir must be valid"));
}
}
// eth0 output needs to be accessible for debugging and uploading
if let Some(file_path) = &self.eth0_output_file {
if let Some(parent) = file_path.parent() {
outputs.insert(parent.to_path_buf());
} else {
outputs.insert(env::current_dir().expect("current dir must be valid"));
}
}
outputs
}
}
Expand Down
18 changes: 17 additions & 1 deletion antlir/antlir2/antlir2_vm/src/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use std::io::BufRead;
use std::io::BufReader;
use std::io::ErrorKind;
use std::io::Read;
use std::io::Write;
use std::net::Shutdown;
use std::os::unix::net::UnixStream;
use std::path::Path;
Expand All @@ -31,6 +32,7 @@ use thiserror::Error;
use tracing::debug;
use tracing::error;
use tracing::info;
use tracing::warn;
use uuid::Uuid;

use crate::disk::QCow2DiskError;
Expand Down Expand Up @@ -131,7 +133,20 @@ impl<S: Share> VM<S> {
&state_dir,
machine.mem_mib,
)?;
let nics = VirtualNICs::new(machine.num_nics, machine.max_combined_channels)?;
let mut nics = VirtualNICs::new(machine.num_nics, machine.max_combined_channels)?;
if nics.len() > 0 {
if let Err(e) = nics[0].try_dump_file(args.eth0_output_file.clone()) {
let err = format!("Failed to set eth0 dump file: {:?}", e);
warn!(err);
// Leave a hint that we could not set the dump file by writting a textual error in the .pcap file.
// This will generate a corrupted .pcap file that an operator can look into to debug and understand what went wrong.
if let Some(filename) = args.eth0_output_file.as_ref() {
// If any part of this fail, we don't want to fail the VM creation.
let _ =
fs::File::create(filename).and_then(|mut f| f.write_all(err.as_bytes()));
}
}
}
let tpm = match machine.use_tpm {
true => Some(TPMDevice::new(&state_dir)?),
false => None,
Expand Down Expand Up @@ -391,6 +406,7 @@ impl<S: Share> VM<S> {
if let Some(tpm) = &self.tpm {
args.extend(tpm.qemu_args());
}

let mut command = Command::new(match self.machine.arch {
CpuIsa::AARCH64 => "qemu-system-aarch64",
CpuIsa::X86_64 => "qemu-system-x86_64",
Expand Down

0 comments on commit c7c8d04

Please sign in to comment.