Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/actions/test-template/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,15 @@ runs:
${{ github.workspace }}/${{ github.run_id }}/${{steps.uuid.outputs.id }}/nemo-rl/tests/.coverage
include-hidden-files: true

- name: Upload nemo_gym actual test data
uses: actions/upload-artifact@v6
if: always()
with:
name: actual_test_nemo_gym_sanity-${{ github.run_id }}
path: |
${{ github.workspace }}/${{ github.run_id }}/${{steps.uuid.outputs.id }}/nemo-rl/tests/unit/environments/nemo_gym_test_data/actual_test_nemo_gym_sanity.json
if-no-files-found: ignore

- name: Container shutdown
if: always()
shell: bash
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ coverage.json
unit_results.json
unit_results/
test_assets/
actual_test_nemo_gym_sanity.json
.nrl_remote_map.json
.nrl_remote_state.json

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ markers = [
"automodel: marks tests that require the automodel extra",
"vllm: marks tests that require the vllm extra",
"sglang: marks tests that require the sglang extra",
"nemo_gym: marks tests that require the nemo_gym extra",
]

[tool.pyrefly]
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/L0_Unit_Tests_Other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ else
uv run --extra sglang bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --sglang-only
fi

# Check and run nemo_gym tests
exit_code=$(cd ${PROJECT_ROOT}/tests && uv run --extra nemo_gym pytest "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --collect-only --nemo-gym-only -q >/dev/null 2>&1; echo $?)
if [[ $exit_code -eq 5 ]]; then
echo "No nemo_gym tests to run"
else
uv run --extra nemo_gym bash -x ./tests/run_unit.sh "${TEST_PATHS[@]}" "${IGNORE[@]}" "${EXCLUDED_UNIT_TESTS[@]}" --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --nemo-gym-only -vv
fi

# Skip research tests in fast mode
if [[ "${FAST:-0}" != "1" ]]; then
for i in research/*/tests/unit; do
Expand Down
28 changes: 27 additions & 1 deletion tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ def pytest_addoption(parser):
default=False,
help="Run ONLY sglang tests",
)
parser.addoption(
"--nemo-gym-only",
action="store_true",
default=False,
help="Run ONLY nemo_gym tests",
)


def pytest_collection_modifyitems(config, items):
Expand All @@ -72,17 +78,19 @@ def pytest_collection_modifyitems(config, items):
run_automodel_only = config.getoption("--automodel-only")
run_vllm_only = config.getoption("--vllm-only")
run_sglang_only = config.getoption("--sglang-only")
run_nemo_gym_only = config.getoption("--nemo-gym-only")

# Check for mutually exclusive options
exclusive_options = [
run_mcore_only,
run_automodel_only,
run_vllm_only,
run_sglang_only,
run_nemo_gym_only,
]
if sum(exclusive_options) > 1:
raise ValueError(
"--mcore-only, --automodel-only, --vllm-only, and --sglang-only are mutually exclusive"
"--mcore-only, --automodel-only, --vllm-only, --sglang-only, and --nemo-gym-only are mutually exclusive"
)

marker_expr = config.getoption("-m", default="")
Expand Down Expand Up @@ -170,6 +178,24 @@ def pytest_collection_modifyitems(config, items):
item for item in new_items if not item.get_closest_marker("sglang")
]

# Filter by nemo_gym marker
if run_nemo_gym_only:
# Validate that nemo_gym is available
try:
from nemo_gym import config_types # noqa: F401
except ImportError:
raise ImportError(
"Cannot run nemo_gym tests: nemo_gym is not available.\n"
"Please run tests with: uv run --extra nemo_gym --group test pytest ..."
)
# Include only nemo_gym tests
new_items = [item for item in new_items if item.get_closest_marker("nemo_gym")]
else:
# Exclude nemo_gym tests by default
new_items = [
item for item in new_items if not item.get_closest_marker("nemo_gym")
]

# Ensure run_first tests are prioritized
new_items.sort(key=lambda item: 0 if item.get_closest_marker("run_first") else 1)

Expand Down

Large diffs are not rendered by default.

64 changes: 48 additions & 16 deletions tests/unit/environments/test_nemo_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import pytest
import ray
import torch
from yaml import safe_load

from nemo_rl.distributed.ray_actor_environment_registry import (
Expand All @@ -35,20 +36,11 @@
tokenizer as nemo_gym_tokenizer, # noqa: F401
)

try:
from nemo_gym import config_types # noqa: F401

NEMO_GYM_INSTALLED = True
except ImportError:
nemo_gym = None
NEMO_GYM_INSTALLED = False


@pytest.mark.skipif(
not NEMO_GYM_INSTALLED,
reason="Skipping NeMo-Gym test since NeMo-Gym is not installed!",
)
@pytest.mark.nemo_gym
def test_nemo_gym_stub_module():
from nemo_gym import config_types

print(
f"NeMo-Gym test successfully run! NeMo-Gym config_types module: {config_types}"
)
Expand Down Expand Up @@ -141,10 +133,43 @@ def nemo_gym_sanity_test_data():
return data


@pytest.mark.skipif(
not NEMO_GYM_INSTALLED,
reason="Skipping NeMo-Gym test since NeMo-Gym is not installed!",
)
def _write_actual_test_data(original_input: list, actual_result: list):
"""Write actual rollout results to actual_test_nemo_gym_sanity.json.

This makes it easy to update the expected output after a Gym commit bump:
cp nemo_gym_test_data/actual_test_nemo_gym_sanity.json nemo_gym_test_data/test_nemo_gym_sanity.json
"""

def _convert(obj):
"""Recursively convert torch tensors to Python lists for JSON serialization."""
if isinstance(obj, torch.Tensor):
return obj.tolist()
if isinstance(obj, dict):
return {k: _convert(v) for k, v in obj.items()}
if isinstance(obj, list):
return [_convert(v) for v in obj]
return obj

cleaned = deepcopy(actual_result)
for r in cleaned:
r.pop("full_result", None)
for msg in r.get("message_log", [])[1:]:
if "token_ids" in msg:
msg["token_ids"] = []
if "generation_logprobs" in msg:
msg["generation_logprobs"] = []

output_path = (
Path(__file__).parent / "nemo_gym_test_data/actual_test_nemo_gym_sanity.json"
)
data = _convert({"input": original_input, "expected_output": cleaned})
with open(output_path, "w") as f:
json.dump(data, f)
f.write("\n")
print(f"Wrote updated test data to {output_path}")


@pytest.mark.nemo_gym
def test_nemo_gym_sanity(
nemo_gym,
nemo_gym_sanity_test_data,
Expand All @@ -153,6 +178,9 @@ def test_nemo_gym_sanity(
):
"""Test basic functionality of MathEnvironment step with simple messages."""

# Save original input before mutation for writing the actual test data file
original_input = deepcopy(nemo_gym_sanity_test_data["input"])

# We need to match NeMo RL generation config params before sending to NeMo-Gym
generation_config = nemo_gym_vllm_generation.cfg
examples = nemo_gym_sanity_test_data["input"]
Expand All @@ -178,6 +206,10 @@ def test_nemo_gym_sanity(
# for message in d["message_log"][:1]:
# message["token_ids"] = message["token_ids"].tolist()

# Write the actual result to a file so it can be used to update the expected output.
# To update: cp actual_test_nemo_gym_sanity.json test_nemo_gym_sanity.json
_write_actual_test_data(original_input, actual_result)

def _standardize_single_result(d: dict):
d = deepcopy(d)
d.pop("full_result", None)
Expand Down
20 changes: 14 additions & 6 deletions tests/unit/experience/test_rollouts.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@

# These are all fixtures
from tests.unit.environments.test_nemo_gym import (
NEMO_GYM_INSTALLED,
cluster, # noqa: F401
nemo_gym, # noqa: F401
nemo_gym_sanity_test_data, # noqa: F401
Expand Down Expand Up @@ -787,10 +786,7 @@ def test_run_sliding_puzzle_vllm(sliding_puzzle_setup_vllm):
print("\nSliding Puzzle VLLM Test assertions passed.")


@pytest.mark.skipif(
not NEMO_GYM_INSTALLED,
reason="Skipping NeMo-Gym test since NeMo-Gym is not installed!",
)
@pytest.mark.nemo_gym
def test_run_async_nemo_gym_rollout(
nemo_gym, # noqa: F811
nemo_gym_vllm_generation, # noqa: F811
Expand Down Expand Up @@ -827,9 +823,20 @@ def test_run_async_nemo_gym_rollout(

expected_result = {
"final_batch": {
"length": torch.tensor([3088, 3056]),
"agent_ref": [
{
"name": "example_multi_step_simple_agent",
"type": "responses_api_agents",
},
{
"name": "example_multi_step_simple_agent",
"type": "responses_api_agents",
},
],
"length": torch.tensor([3080, 3048]),
"loss_multiplier": torch.tensor([1.0, 1.0]),
"total_reward": torch.tensor([0.0, 0.0]),
"truncated": torch.tensor([False, False]),
},
"rollout_metrics": {
# core metrics
Expand Down Expand Up @@ -909,6 +916,7 @@ def _standardize(d: dict) -> dict:
final_batch["total_reward"] = final_batch["total_reward"].tolist()
final_batch["loss_multiplier"] = final_batch["loss_multiplier"].tolist()
final_batch["length"] = final_batch["length"].tolist()
final_batch["truncated"] = final_batch["truncated"].tolist()

for key in d["rollout_metrics"]:
# We remove these fields from comparison since we cannot guarantee exact generation reproducibility
Expand Down
Loading