diff --git a/lib/wasix/src/bin_factory/binary_package.rs b/lib/wasix/src/bin_factory/binary_package.rs index c41db6680d8..a20bc0d126b 100644 --- a/lib/wasix/src/bin_factory/binary_package.rs +++ b/lib/wasix/src/bin_factory/binary_package.rs @@ -19,6 +19,20 @@ use crate::{ }; use wasmer_types::ModuleHash; +/// Returns `true` if `s` is safe to embed as a single filesystem path component. +/// +/// Rejects empty strings, the special entries `.` and `..`, and any string +/// containing a path separator (`/`, `\`) or a null byte, all of which could +/// be used to escape the intended `/bin/.__atoms/…` subtree. +fn is_safe_path_component(s: &str) -> bool { + !s.is_empty() + && s != "." + && s != ".." + && !s.contains('/') + && !s.contains('\\') + && !s.contains('\0') +} + #[derive(derive_more::Debug, Clone)] pub struct BinaryPackageCommand { name: String, @@ -103,6 +117,43 @@ impl BinaryPackageCommand { // If no annotations were found, return None None } + + /// Returns the VFS path at which this command's atom is stored so that a + /// process can re-exec via `argv[0]` without triggering command-level + /// metadata such as `main_args`. + /// + /// The path encodes both the origin package and the atom name to avoid + /// collisions between atoms with the same name coming from different + /// packages. Returns `None` when the command carries no atom annotation. + pub fn atom_vfs_path(&self) -> Option { + let atom_name = self.metadata().atom().ok().flatten().map(|a| a.name)?; + let pkg_segment = match self.origin_package.as_named() { + Some(named) => named.full_name.clone(), + None => self.origin_package.to_string(), + }; + + // pkg_segment may legitimately contain '/' (e.g. "namespace/package"), + // so validate each component individually. atom_name must be a single + // component with no separator. Reject anything that could be used for + // path traversal before embedding it into the guest filesystem path. + if !pkg_segment.split('/').all(is_safe_path_component) { + tracing::warn!( + origin_package = %self.origin_package, + "Refusing to mount atom: origin package contains unsafe path components", + ); + return None; + } + if !is_safe_path_component(&atom_name) { + tracing::warn!( + origin_package = %self.origin_package, + atom_name = %atom_name, + "Refusing to mount atom: atom name contains unsafe path components", + ); + return None; + } + + Some(format!("/bin/.__atoms/{pkg_segment}/{atom_name}")) + } } /// A WebAssembly package that has been loaded into memory. diff --git a/lib/wasix/src/runners/wasi.rs b/lib/wasix/src/runners/wasi.rs index c416457e951..ec0733374b7 100644 --- a/lib/wasix/src/runners/wasi.rs +++ b/lib/wasix/src/runners/wasi.rs @@ -462,16 +462,21 @@ impl WasiRunner { .annotation("wasi")? .unwrap_or_else(|| Wasi::new(command_name)); - let exec_name = if let Some(exec_name) = wasi.exec_name.as_ref() { - exec_name - } else { - command_name - }; + // Prefer an explicit exec_name from the annotation; otherwise use the + // atom's canonical VFS path (e.g. "/bin/.__atoms/wasmer/php/php") so + // that re-exec via argv[0] finds the raw wasm without triggering + // prepare_spawn again. Fall back to the command name for packages + // that carry no atom annotation. + let exec_name: String = wasi + .exec_name + .clone() + .or_else(|| cmd.atom_vfs_path()) + .unwrap_or_else(|| command_name.to_owned()); #[allow(unused_mut)] let mut builder = self .prepare_webc_env( - exec_name, + &exec_name, &wasi, PackageOrHash::Package(pkg), runtime_or_engine, diff --git a/lib/wasix/src/state/env.rs b/lib/wasix/src/state/env.rs index 783291f0be1..ac5fc5bab08 100644 --- a/lib/wasix/src/state/env.rs +++ b/lib/wasix/src/state/env.rs @@ -1134,20 +1134,20 @@ impl WasiEnv { let atom = command.atom(); if let Err(err) = write_readonly_buffer_to_fs(root_fs, path, &atom).await { - tracing::debug!( - "failed to add package [{}] command [{}] - {}", - pkg.id, - command.name(), - err + tracing::warn!( + package=%pkg.id, + command_name=command.name(), + error=%err, + "Failed to mount command into the filesystem", ); continue; } if let Err(err) = write_readonly_buffer_to_fs(root_fs, path2, &atom).await { - tracing::debug!( - "failed to add package [{}] command [{}] - {}", - pkg.id, - command.name(), - err + tracing::warn!( + package=%pkg.id, + command_name=command.name(), + error=%err, + "Failed to mount command into the filesystem", ); continue; } @@ -1167,6 +1167,50 @@ impl WasiEnv { "Injected a command into the filesystem", ); } + + // Mount each atom under /bin/.__atoms// so that a + // process re-execing via argv[0] (set to that path) finds the raw + // wasm bytes without command metadata. We intentionally do NOT + // register these paths in bin_factory so they are loaded as plain + // wasm executables, bypassing prepare_spawn and preventing + // main_args from being re-injected. + // + // The path encodes the origin package so atoms with the same name + // from different packages are kept separate. An atom referenced by + // multiple commands is only written once (deduplicated by path). + // + // Track only atoms that were actually written so that + // atom_vfs_path() in prepare_spawn is not set to a path that does + // not exist in the VFS. + let mut mounted_atoms: std::collections::HashSet = + std::collections::HashSet::new(); + for command in &pkg.commands { + if let Some(atom_path) = command.atom_vfs_path() { + if mounted_atoms.contains(&atom_path) { + continue; + } + let atom = command.atom(); + if let Err(err) = + write_readonly_buffer_to_fs(root_fs, Path::new(&atom_path), &atom).await + { + tracing::warn!( + package=%pkg.id, + origin_package=%command.origin_package(), + atom_path=%atom_path, + error=%err, + "Failed to mount atom into the filesystem; re-exec via argv[0] may not work", + ); + continue; + } + mounted_atoms.insert(atom_path.clone()); + tracing::debug!( + package=%pkg.id, + origin_package=%command.origin_package(), + atom_path=%atom_path, + "Injected atom into the filesystem", + ); + } + } } Ok(()) @@ -1342,8 +1386,13 @@ impl WasiEnv { args.splice(1..1, main_args); } - if let Some(exec_name) = exec_name { - self.state.args.lock().unwrap()[0] = exec_name; + // Prefer an explicit exec_name from the annotation; otherwise use + // the atom's canonical VFS path (e.g. + // "/bin/.__atoms/wasmer/php/php") so that re-exec via argv[0] finds + // the raw wasm without triggering prepare_spawn again. + let argv0 = exec_name.or_else(|| cmd.atom_vfs_path()); + if let Some(argv0) = argv0 { + self.state.args.lock().unwrap()[0] = argv0; } } } diff --git a/lib/wasix/tests/wasm_tests/mod.rs b/lib/wasix/tests/wasm_tests/mod.rs index 90d12df9458..b534977f280 100644 --- a/lib/wasix/tests/wasm_tests/mod.rs +++ b/lib/wasix/tests/wasm_tests/mod.rs @@ -20,9 +20,12 @@ mod lifecycle_tests; mod longjmp_tests; mod path_tests; mod poll_tests; +mod proc_exec; +mod proc_exec2; mod proc_exec3; mod proc_exec3_empty_argv; mod proc_exec3_errors; +mod proc_exec_command_argv0; mod reflect_signature; mod reflection_tests; mod sched_yield; diff --git a/lib/wasix/tests/wasm_tests/proc_exec_command_argv0.rs b/lib/wasix/tests/wasm_tests/proc_exec_command_argv0.rs new file mode 100644 index 00000000000..eaa4c35752a --- /dev/null +++ b/lib/wasix/tests/wasm_tests/proc_exec_command_argv0.rs @@ -0,0 +1,77 @@ +use std::sync::Arc; + +use tempfile::TempDir; +use wasmer_wasix::{ + PluggableRuntime, Runtime, + bin_factory::BinaryPackage, + runners::wasi::{RuntimeOrEngine, WasiRunner}, + runtime::{ + module_cache::{FileSystemCache, ModuleCache, SharedCache}, + package_loader::BuiltinPackageLoader, + task_manager::tokio::TokioTaskManager, + }, +}; + +use super::run_build_script; + +/// Verify that a process re-exec'd via argv[0] does not inherit the command's +/// main_args a second time. The C program re-execs itself with a "child" +/// marker; in child mode it asserts argc == 2 (only argv[0] and the marker). +/// Before the fix, the command's main_args would be re-injected, causing +/// argc > 2 and a test failure. +#[cfg_attr( + not(feature = "sys-thread"), + ignore = "The tokio task manager isn't available on this platform" +)] +#[tokio::test(flavor = "multi_thread")] +async fn test_proc_exec_command_argv0() { + let wasm = run_build_script(file!(), ".").unwrap(); + let wasm_bytes = std::fs::read(&wasm).unwrap(); + + // Create a temp dir with a wasmer.toml that has: + // - atom "inner" (the compiled wasm) + // - command "outer" using atom "inner" with extra main_args + // The command name and atom name differ on purpose so we can verify that + // argv[0] is set to the atom VFS path (for example + // /bin/.__atoms//), not the command name, after the fix. + let temp = TempDir::new().unwrap(); + let wasmer_toml = r#" +[package] +name = "test/command-argv0" +version = "0.0.0" +description = "test package" + +[[module]] +name = "inner" +source = "inner.wasm" +abi = "wasi" + +[[command]] +name = "outer" +module = "inner" +main_args = "--extra-arg" +"#; + std::fs::write(temp.path().join("wasmer.toml"), wasmer_toml).unwrap(); + std::fs::write(temp.path().join("inner.wasm"), &wasm_bytes).unwrap(); + + let tasks = Arc::new(TokioTaskManager::new(tokio::runtime::Handle::current())); + let mut rt = PluggableRuntime::new(Arc::clone(&tasks) as Arc<_>); + let cache = SharedCache::default().with_fallback(FileSystemCache::new( + temp.path().join("module-cache"), + tasks.clone(), + )); + rt.set_module_cache(cache) + .set_package_loader(BuiltinPackageLoader::new()); + + let pkg = BinaryPackage::from_dir(temp.path(), &rt).await.unwrap(); + let rt: Arc = Arc::new(rt); + + let result = std::thread::spawn(move || { + let _guard = tasks.runtime_handle().enter(); + WasiRunner::new().run_command("outer", &pkg, RuntimeOrEngine::Runtime(Arc::clone(&rt))) + }) + .join() + .unwrap(); + + result.unwrap(); +} diff --git a/lib/wasix/tests/wasm_tests/proc_exec_command_argv0/build.sh b/lib/wasix/tests/wasm_tests/proc_exec_command_argv0/build.sh new file mode 100644 index 00000000000..3b729681c10 --- /dev/null +++ b/lib/wasix/tests/wasm_tests/proc_exec_command_argv0/build.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +$CC main.c -o main diff --git a/lib/wasix/tests/wasm_tests/proc_exec_command_argv0/main.c b/lib/wasix/tests/wasm_tests/proc_exec_command_argv0/main.c new file mode 100644 index 00000000000..59373c7d838 --- /dev/null +++ b/lib/wasix/tests/wasm_tests/proc_exec_command_argv0/main.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include + +#define CHILD_MARKER "child" +#define EXPECTED_MAIN_ARG "--extra-arg" + +int main(int argc, char* argv[]) { + /* Child mode: invoked via execvp(argv[0], ...) from the parent. + * Verify that the command's main_args are NOT re-injected a second time. */ + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], CHILD_MARKER) == 0) { + if (argc != 2) { + fprintf(stderr, "FAIL: child expected argc=2 but got %d\n", argc); + for (int j = 0; j < argc; j++) { + fprintf(stderr, " argv[%d] = %s\n", j, argv[j]); + } + return 1; + } + /* Success: main_args were not re-injected into the re-exec'd process */ + return 0; + } + } + + /* Parent mode: verify that the command's main_args were injected for us */ + int found_main_arg = 0; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], EXPECTED_MAIN_ARG) == 0) { + found_main_arg = 1; + break; + } + } + if (!found_main_arg) { + fprintf(stderr, "FAIL: parent expected '%s' in argv but did not find it\n", + EXPECTED_MAIN_ARG); + for (int j = 0; j < argc; j++) { + fprintf(stderr, " argv[%d] = %s\n", j, argv[j]); + } + return 1; + } + + /* Re-exec ourselves via argv[0] with the child marker. + * After the fix, argv[0] is the canonical atom VFS path, so execvp will + * execvp will find the raw atom binary without command metadata, preventing + * main_args from being re-injected. */ + char* new_argv[] = {argv[0], CHILD_MARKER, NULL}; + execvp(argv[0], new_argv); + perror("execvp failed"); + return 1; +}