Skip to content

Commit cec24cc

Browse files
committed
tests(js_repl): stabilize CI runtime test execution
git-stack-id: fjord/js_repl_seq---4htl2cund94dlu git-stack-title: tests(js_repl): stabilize CI runtime test execution
1 parent e7b6f38 commit cec24cc

File tree

7 files changed

+309
-264
lines changed

7 files changed

+309
-264
lines changed

.github/workflows/bazel.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,16 @@ jobs:
4747
steps:
4848
- uses: actions/checkout@v6
4949

50+
- name: Set up Node.js for js_repl tests
51+
uses: actions/setup-node@v6
52+
with:
53+
node-version-file: codex-rs/node-version.txt
54+
check-latest: true
55+
56+
- name: Make Node.js available in PATH (Unix)
57+
if: runner.os != 'Windows'
58+
run: cp "$(which node)" /usr/local/bin
59+
5060
# Some integration tests rely on DotSlash being installed.
5161
# See https://github.com/openai/codex/pull/7617.
5262
- name: Install DotSlash
@@ -117,6 +127,12 @@ jobs:
117127
--build_metadata=VISIBILITY=PUBLIC
118128
)
119129
130+
if [[ "${RUNNER_OS:-}" != "Windows" ]]; then
131+
# Bazel test sandboxes on macOS may resolve an older Homebrew `node`
132+
# before the `actions/setup-node` runtime on PATH.
133+
bazel_args+=(--test_env=CODEX_JS_REPL_NODE_PATH=/usr/local/bin/node)
134+
fi
135+
120136
if [[ -n "${BUILDBUDDY_API_KEY:-}" ]]; then
121137
echo "BuildBuddy API key is available; using remote Bazel configuration."
122138
bazel $BAZEL_STARTUP_ARGS \

.github/workflows/rust-ci.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,11 @@ jobs:
499499

500500
steps:
501501
- uses: actions/checkout@v6
502+
- name: Set up Node.js for js_repl tests
503+
uses: actions/setup-node@v6
504+
with:
505+
node-version-file: codex-rs/node-version.txt
506+
check-latest: true
502507
- name: Install Linux build dependencies
503508
if: ${{ runner.os == 'Linux' }}
504509
shell: bash

codex-rs/.config/nextest.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,13 @@ slow-timeout = { period = "1m", terminate-after = 4 }
1111
[[profile.default.overrides]]
1212
filter = 'test(approval_matrix_covers_all_modes)'
1313
slow-timeout = { period = "30s", terminate-after = 2 }
14+
15+
[[profile.default.overrides]]
16+
# js_repl runtime tests each spawn a Node kernel; on Windows x64 CI they can
17+
# timeout when nextest launches many of them concurrently. This covers both
18+
# unit tests and tool-level integration tests that exercise js_repl.
19+
filter = 'package(codex-core) and (test(tools::js_repl::tests::js_repl_) or test(suite::js_repl::js_repl_) or test(suite::view_image::js_repl_))'
20+
test-group = "js-repl-runtime-serial"
21+
22+
[test-groups.js-repl-runtime-serial]
23+
max-threads = 1

codex-rs/core/src/tools/handlers/js_repl.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,6 @@ mod tests {
288288
use std::time::Duration;
289289

290290
use super::parse_freeform_args;
291-
use crate::codex::make_session_and_context_with_rx;
292291
use crate::protocol::EventMsg;
293292
use crate::protocol::ExecCommandSource;
294293
use pretty_assertions::assert_eq;
@@ -339,7 +338,7 @@ mod tests {
339338

340339
#[tokio::test]
341340
async fn emit_js_repl_exec_end_sends_event() {
342-
let (session, turn, rx) = make_session_and_context_with_rx().await;
341+
let (session, turn, rx) = crate::codex::make_session_and_context_with_rx().await;
343342
super::emit_js_repl_exec_end(
344343
session.as_ref(),
345344
turn.as_ref(),

codex-rs/core/src/tools/js_repl/mod.rs

Lines changed: 11 additions & 262 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,18 +1287,25 @@ pub(crate) fn resolve_node(config_path: Option<&Path>) -> Option<PathBuf> {
12871287
#[cfg(test)]
12881288
mod tests {
12891289
use super::*;
1290-
use crate::codex::make_session_and_context;
12911290
use crate::protocol::AskForApproval;
12921291
use crate::protocol::SandboxPolicy;
12931292
use crate::turn_diff_tracker::TurnDiffTracker;
1294-
use codex_protocol::models::ContentItem;
1295-
use codex_protocol::models::ResponseInputItem;
1296-
use codex_protocol::openai_models::InputModality;
12971293
use pretty_assertions::assert_eq;
12981294
use std::fs;
12991295
use std::path::Path;
13001296
use tempfile::tempdir;
13011297

1298+
fn configure_js_repl_test_sandbox(turn: &mut crate::codex::TurnContext) {
1299+
// Manager-level runtime tests don't need to exercise Linux arg0 sandbox dispatch.
1300+
turn.sandbox_policy = SandboxPolicy::DangerFullAccess;
1301+
}
1302+
1303+
async fn make_session_and_context() -> (crate::codex::Session, crate::codex::TurnContext) {
1304+
let (session, mut turn) = crate::codex::make_session_and_context().await;
1305+
configure_js_repl_test_sandbox(&mut turn);
1306+
(session, turn)
1307+
}
1308+
13021309
#[test]
13031310
fn node_version_parses_v_prefix_and_suffix() {
13041311
let version = NodeVersion::parse("v25.1.0-nightly.2024").unwrap();
@@ -1606,47 +1613,6 @@ mod tests {
16061613
Ok(())
16071614
}
16081615

1609-
#[tokio::test]
1610-
async fn js_repl_persists_top_level_bindings_and_supports_tla() -> anyhow::Result<()> {
1611-
if !can_run_js_repl_runtime_tests().await {
1612-
return Ok(());
1613-
}
1614-
1615-
let (session, turn) = make_session_and_context().await;
1616-
let session = Arc::new(session);
1617-
let turn = Arc::new(turn);
1618-
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
1619-
let manager = turn.js_repl.manager().await?;
1620-
1621-
let first = manager
1622-
.execute(
1623-
Arc::clone(&session),
1624-
Arc::clone(&turn),
1625-
Arc::clone(&tracker),
1626-
JsReplArgs {
1627-
code: "let x = await Promise.resolve(41); console.log(x);".to_string(),
1628-
timeout_ms: Some(10_000),
1629-
},
1630-
)
1631-
.await?;
1632-
assert!(first.output.contains("41"));
1633-
1634-
let second = manager
1635-
.execute(
1636-
Arc::clone(&session),
1637-
Arc::clone(&turn),
1638-
Arc::clone(&tracker),
1639-
JsReplArgs {
1640-
code: "console.log(x + 1);".to_string(),
1641-
timeout_ms: Some(10_000),
1642-
},
1643-
)
1644-
.await?;
1645-
1646-
assert!(second.output.contains("42"));
1647-
Ok(())
1648-
}
1649-
16501616
#[tokio::test]
16511617
async fn js_repl_timeout_does_not_deadlock() -> anyhow::Result<()> {
16521618
if !can_run_js_repl_runtime_tests().await {
@@ -1793,97 +1759,6 @@ mod tests {
17931759
Ok(())
17941760
}
17951761

1796-
#[tokio::test]
1797-
async fn js_repl_can_call_tools() -> anyhow::Result<()> {
1798-
if !can_run_js_repl_runtime_tests().await {
1799-
return Ok(());
1800-
}
1801-
1802-
let (session, mut turn) = make_session_and_context().await;
1803-
turn.approval_policy = AskForApproval::Never;
1804-
turn.sandbox_policy = SandboxPolicy::DangerFullAccess;
1805-
1806-
let session = Arc::new(session);
1807-
let turn = Arc::new(turn);
1808-
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
1809-
let manager = turn.js_repl.manager().await?;
1810-
1811-
let shell = manager
1812-
.execute(
1813-
Arc::clone(&session),
1814-
Arc::clone(&turn),
1815-
Arc::clone(&tracker),
1816-
JsReplArgs {
1817-
code: "const shellOut = await codex.tool(\"shell_command\", { command: \"printf js_repl_shell_ok\" }); console.log(JSON.stringify(shellOut));".to_string(),
1818-
timeout_ms: Some(15_000),
1819-
},
1820-
)
1821-
.await?;
1822-
assert!(shell.output.contains("js_repl_shell_ok"));
1823-
1824-
let tool = manager
1825-
.execute(
1826-
Arc::clone(&session),
1827-
Arc::clone(&turn),
1828-
Arc::clone(&tracker),
1829-
JsReplArgs {
1830-
code: "const toolOut = await codex.tool(\"list_mcp_resources\", {}); console.log(toolOut.type);".to_string(),
1831-
timeout_ms: Some(15_000),
1832-
},
1833-
)
1834-
.await?;
1835-
assert!(tool.output.contains("function_call_output"));
1836-
Ok(())
1837-
}
1838-
1839-
#[tokio::test]
1840-
async fn js_repl_tool_call_rejects_recursive_js_repl_invocation() -> anyhow::Result<()> {
1841-
if !can_run_js_repl_runtime_tests().await {
1842-
return Ok(());
1843-
}
1844-
1845-
let (session, mut turn) = make_session_and_context().await;
1846-
turn.approval_policy = AskForApproval::Never;
1847-
turn.sandbox_policy = SandboxPolicy::DangerFullAccess;
1848-
1849-
let session = Arc::new(session);
1850-
let turn = Arc::new(turn);
1851-
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
1852-
let manager = turn.js_repl.manager().await?;
1853-
1854-
let result = manager
1855-
.execute(
1856-
session,
1857-
turn,
1858-
tracker,
1859-
JsReplArgs {
1860-
code: r#"
1861-
try {
1862-
await codex.tool("js_repl", "console.log('recursive')");
1863-
console.log("unexpected-success");
1864-
} catch (err) {
1865-
console.log(String(err));
1866-
}
1867-
"#
1868-
.to_string(),
1869-
timeout_ms: Some(15_000),
1870-
},
1871-
)
1872-
.await?;
1873-
1874-
assert!(
1875-
result.output.contains("js_repl cannot invoke itself"),
1876-
"expected recursion guard message, got output: {}",
1877-
result.output
1878-
);
1879-
assert!(
1880-
!result.output.contains("unexpected-success"),
1881-
"recursive js_repl tool call unexpectedly succeeded: {}",
1882-
result.output
1883-
);
1884-
Ok(())
1885-
}
1886-
18871762
#[tokio::test]
18881763
async fn js_repl_waits_for_unawaited_tool_calls_before_completion() -> anyhow::Result<()> {
18891764
if !can_run_js_repl_runtime_tests().await || cfg!(windows) {
@@ -1927,132 +1802,6 @@ console.log("cell-complete");
19271802
Ok(())
19281803
}
19291804

1930-
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
1931-
async fn js_repl_can_attach_image_via_view_image_tool() -> anyhow::Result<()> {
1932-
if !can_run_js_repl_runtime_tests().await {
1933-
return Ok(());
1934-
}
1935-
1936-
let (session, mut turn) = make_session_and_context().await;
1937-
if !turn
1938-
.model_info
1939-
.input_modalities
1940-
.contains(&InputModality::Image)
1941-
{
1942-
return Ok(());
1943-
}
1944-
turn.approval_policy = AskForApproval::Never;
1945-
turn.sandbox_policy = SandboxPolicy::DangerFullAccess;
1946-
1947-
let session = Arc::new(session);
1948-
let turn = Arc::new(turn);
1949-
*session.active_turn.lock().await = Some(crate::state::ActiveTurn::default());
1950-
1951-
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
1952-
let manager = turn.js_repl.manager().await?;
1953-
let code = r#"
1954-
const fs = await import("node:fs/promises");
1955-
const path = await import("node:path");
1956-
const imagePath = path.join(codex.tmpDir, "js-repl-view-image.png");
1957-
const png = Buffer.from(
1958-
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
1959-
"base64"
1960-
);
1961-
await fs.writeFile(imagePath, png);
1962-
const out = await codex.tool("view_image", { path: imagePath });
1963-
console.log(out.type);
1964-
console.log(out.output?.body?.text ?? "");
1965-
"#;
1966-
1967-
let result = manager
1968-
.execute(
1969-
Arc::clone(&session),
1970-
turn,
1971-
tracker,
1972-
JsReplArgs {
1973-
code: code.to_string(),
1974-
timeout_ms: Some(15_000),
1975-
},
1976-
)
1977-
.await?;
1978-
assert!(result.output.contains("function_call_output"));
1979-
1980-
let pending_input = session.get_pending_input().await;
1981-
let image_url = pending_input
1982-
.iter()
1983-
.find_map(|item| match item {
1984-
ResponseInputItem::Message { content, .. } => {
1985-
content.iter().find_map(|content_item| match content_item {
1986-
ContentItem::InputImage { image_url } => Some(image_url.as_str()),
1987-
_ => None,
1988-
})
1989-
}
1990-
_ => None,
1991-
})
1992-
.expect("view_image should inject an input_image message for the active turn");
1993-
assert!(image_url.starts_with("data:image/png;base64,"));
1994-
1995-
Ok(())
1996-
}
1997-
1998-
#[tokio::test]
1999-
async fn js_repl_does_not_expose_process_global() -> anyhow::Result<()> {
2000-
if !can_run_js_repl_runtime_tests().await {
2001-
return Ok(());
2002-
}
2003-
2004-
let (session, turn) = make_session_and_context().await;
2005-
let session = Arc::new(session);
2006-
let turn = Arc::new(turn);
2007-
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
2008-
let manager = turn.js_repl.manager().await?;
2009-
2010-
let result = manager
2011-
.execute(
2012-
session,
2013-
turn,
2014-
tracker,
2015-
JsReplArgs {
2016-
code: "console.log(typeof process);".to_string(),
2017-
timeout_ms: Some(10_000),
2018-
},
2019-
)
2020-
.await?;
2021-
assert!(result.output.contains("undefined"));
2022-
Ok(())
2023-
}
2024-
2025-
#[tokio::test]
2026-
async fn js_repl_blocks_sensitive_builtin_imports() -> anyhow::Result<()> {
2027-
if !can_run_js_repl_runtime_tests().await {
2028-
return Ok(());
2029-
}
2030-
2031-
let (session, turn) = make_session_and_context().await;
2032-
let session = Arc::new(session);
2033-
let turn = Arc::new(turn);
2034-
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
2035-
let manager = turn.js_repl.manager().await?;
2036-
2037-
let err = manager
2038-
.execute(
2039-
session,
2040-
turn,
2041-
tracker,
2042-
JsReplArgs {
2043-
code: "await import(\"node:process\");".to_string(),
2044-
timeout_ms: Some(10_000),
2045-
},
2046-
)
2047-
.await
2048-
.expect_err("node:process import should be blocked");
2049-
assert!(
2050-
err.to_string()
2051-
.contains("Importing module \"node:process\" is not allowed in js_repl")
2052-
);
2053-
Ok(())
2054-
}
2055-
20561805
#[tokio::test]
20571806
async fn js_repl_prefers_env_node_module_dirs_over_config() -> anyhow::Result<()> {
20581807
if !can_run_js_repl_runtime_tests().await {

0 commit comments

Comments
 (0)