diff --git a/cluster_configs/example-local.yaml b/cluster_configs/example-local.yaml index ffa5e8e268..a2c976dd73 100644 --- a/cluster_configs/example-local.yaml +++ b/cluster_configs/example-local.yaml @@ -23,7 +23,8 @@ containers: sandbox: dockerfile:dockerfiles/Dockerfile.sandbox nemo-skills: dockerfile:dockerfiles/Dockerfile.nemo-skills verl: dockerfile:dockerfiles/Dockerfile.verl - nemo-rl: dockerfile:dockerfiles/Dockerfile.nemo-rl + # nemo-rl: use NGC image with commit-based tag; replace with desired tag e.g. nvcr.io/nvidian/nemo-rl: + nemo-rl: nvcr.io/nvidian/nemo-rl:9148186-44694499 # add required mounts for models/data here # the code is mounted automatically inside /nemo_run/code diff --git a/dockerfiles/Dockerfile.nemo-rl b/dockerfiles/Dockerfile.nemo-rl deleted file mode 100644 index 9b66bf7f44..0000000000 --- a/dockerfiles/Dockerfile.nemo-rl +++ /dev/null @@ -1,149 +0,0 @@ -# syntax=docker/dockerfile:1 -# copied and edited from https://github.com/NVIDIA/NeMo-RL/blob/main/docker/Dockerfile -# TODO: from next update try to re-use their dockerfile as is as they support specifying the commit - -ARG BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base:25.05-cuda12.9-devel-ubuntu24.04 - -FROM scratch AS nemo-rl - -ARG NEMO_RL_COMMIT=${NEMO_RL_COMMIT:-e95efb912a6909b5da91ffeb197debe91fd480d8} -ADD --keep-git-dir=true https://github.com/NVIDIA-NeMo/RL.git#${NEMO_RL_COMMIT} / - - -FROM ${BASE_IMAGE} AS base -# An environment variable to indicate that we are in a container. -ENV NRL_CONTAINER=1 - -# It is more convenient for users to run as root -USER root - -RUN <<"EOF" bash -exu -o pipefail -export DEBIAN_FRONTEND=noninteractive -export TZ=America/Los_Angeles - -apt-get update -apt-get install -y --no-install-recommends \ - jq \ - curl \ - git \ - rsync \ - wget \ - less \ - vim \ - -# Nsight -apt install -y --no-install-recommends gnupg -echo "deb http://developer.download.nvidia.com/devtools/repos/ubuntu$(source /etc/lsb-release; echo "$DISTRIB_RELEASE" | tr -d .)/$(dpkg --print-architecture) /" | tee /etc/apt/sources.list.d/nvidia-devtools.list -apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub -apt update -apt install -y nsight-systems-cli - -# To fix CVE-2025-68973 -apt install -y --only-upgrade gnupg - -apt-get clean -rm -rf /var/lib/apt/lists/* -EOF - -# Install uv and python -ARG UV_VERSION=0.9.7 -ARG PYTHON_VERSION=3.12 -ENV PATH="/root/.local/bin:$PATH" -RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \ - uv python install ${PYTHON_VERSION} - -# Disable usage stats by default for users who are sensitive to sharing usage. -# Users are encouraged to enable if the wish. -ENV RAY_USAGE_STATS_ENABLED=0 -# After ray>=2.47, this feature is enabled by default which creates uv venvs for any py_executable starting with `uv run`. -# There is severe contention and performance issues with this enabled considering our dependencies are so large and occasionally -# need to be compiled, so NeMo RL has an implementation in nemo_rl/utils/venv.py that does it once per node as opposed to once per task. -ENV RAY_ENABLE_UV_RUN_RUNTIME_ENV=0 -ENV NEMO_RL_VENV_DIR=/opt/ray_venvs - - -FROM base AS hermetic - -WORKDIR /opt/NeMo-RL - -# Variables to control the build of TE. If there are issues with parallelization, consider -# setting these to 1. -ARG MAX_JOBS -ARG NVTE_BUILD_THREADS_PER_JOB -# Only use for custom vllm installs. Learn more at https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md -ARG BUILD_CUSTOM_VLLM - -ENV UV_PROJECT_ENVIRONMENT=/opt/nemo_rl_venv -ENV UV_LINK_MODE=copy - -# Ensure DeepEP is built for H100 and B200 (also mcore inference unified memory API now invokes a torch API that requires these to be set) -ENV TORCH_CUDA_ARCH_LIST="9.0 10.0" - -# First copy only the dependency files -COPY --from=nemo-rl pyproject.toml uv.lock ./ -# Copy in the top level __init__.py/package_info.py since build-custom-vllm.sh needs the nemo_rl package to exist. -COPY --from=nemo-rl nemo_rl/__init__.py nemo_rl/package_info.py ./nemo_rl/ -COPY --from=nemo-rl tools/build-custom-vllm.sh ./tools/build-custom-vllm.sh -COPY --from=nemo-rl --link research/ ./research/ -COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/ - -RUN --mount=type=ssh <<"EOF" bash -exu -uv venv --seed -if [[ -n "${BUILD_CUSTOM_VLLM:-}" ]]; then - bash tools/build-custom-vllm.sh - source 3rdparty/vllm/nemo-rl.env -fi -# uv sync has a more reliable resolver than simple uv pip install which can fail - -# Sync each training + inference backend one at a time (since they may conflict) -# to warm the uv cache, then at the end just sync the default dependencies. -# Do everything in one layer to prevent large layers. - -# The venv is symlinked to avoid bloating the layer size -uv sync --link-mode symlink --locked --no-install-project -uv sync --link-mode symlink --locked --extra vllm --no-install-project -uv sync --link-mode symlink --locked --extra mcore --no-install-project -uv sync --link-mode symlink --locked --extra automodel --no-install-project -uv sync --link-mode symlink --locked --all-groups --no-install-project - -# Remove the aiohttp in this uv cache dir to fully address CVE GHSA-mqqc-3gqh-h2x8 -# The ray install will include the older aiohttp version in its cache -find /root/.cache/uv -type d -path "*ray/_private/runtime_env/agent/thirdparty_files/aiohttp*" -exec rm -rf {} + -EOF - -ENV PATH="/opt/nemo_rl_venv/bin:$PATH" -ENV NEMO_RL_VENV_DIR=/opt/ray_venvs - -WORKDIR /opt/NeMo-RL - -FROM hermetic AS release - -ARG NVIDIA_BUILD_ID -ARG NVIDIA_BUILD_REF -ARG RC_DATE=00.00 -ARG TARGETARCH -ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID:-} -ENV NVIDIA_BUILD_REF=${NVIDIA_BUILD_REF:-} -LABEL com.nvidia.build.id="${NVIDIA_BUILD_ID}" -LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}" - -ENV NEMO_RL_VENV_DIR=/opt/ray_venvs - -# Copy in source from build context (defaults to cloned repo, can be overridden) -# Exclude pyproject.toml and uv.lock since those may be altered by build-custom-vllm.sh -COPY --from=nemo-rl --exclude=pyproject.toml --exclude=uv.lock . /opt/NeMo-RL -# Unshallow the repo to get the full history (in the case it was from the scratch layer). -# Potentially not necessary if the repo is passed in as a complete repository (w/ full git history), -# so do a quick check before trying to unshallow. -RUN git rev-parse --is-shallow-repository | grep -q true && git fetch --unshallow || true -RUN UV_LINK_MODE=symlink uv run nemo_rl/utils/prefetch_venvs.py - -# Generate container fingerprint for frozen environment support -# Store outside /opt/NeMo-RL to avoid being overwritten by user mounts -RUN python tools/generate_fingerprint.py > /opt/nemo_rl_container_fingerprint - -# NOTICES.txt file points to where the OSS source code is archived -RUN echo "This distribution includes open source which is archived at the following URL: https://opensource.nvidia.com/oss/teams/nvidia/nemo-rl/${RC_DATE}:linux-${TARGETARCH}/index.html" > NOTICES.txt && \ - echo "For further inquiries or assistance, contact us at oss-requests@nvidia.com" >> NOTICES.txt - -RUN git clone https://github.com/NVIDIA-NeMo/Skills.git /opt/NeMo-Skills && cd /opt/NeMo-Skills && uv pip install . diff --git a/dockerfiles/README.md b/dockerfiles/README.md index f7f0218f3b..4c9f8ba38a 100644 --- a/dockerfiles/README.md +++ b/dockerfiles/README.md @@ -33,3 +33,11 @@ We directly use official `nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc1` image. ## Building sglang image We directly use official `lmsysorg/sglang:v0.5.8` image. + +## Building vllm image + +We use official `vllm/vllm-openai:v0.14.1` image with the additional `vllm[audio]` dependencies. + +## nemo-rl image + +We do not ship a Dockerfile for nemo-rl. Use NVIDIA's pre-built image from NGC with a commit-based tag, e.g. `nvcr.io/nvidian/nemo-rl:9148186-44694499`. Set this in your cluster config under `containers.nemo-rl` (see `cluster_configs/example-local.yaml`). Replace the tag with the desired commit/build id. diff --git a/nemo_skills/__init__.py b/nemo_skills/__init__.py index 6b72d83364..4f427dc146 100644 --- a/nemo_skills/__init__.py +++ b/nemo_skills/__init__.py @@ -23,5 +23,6 @@ "sandbox": "dockerfile:dockerfiles/Dockerfile.sandbox", "nemo-skills": "dockerfile:dockerfiles/Dockerfile.nemo-skills", "verl": "dockerfile:dockerfiles/Dockerfile.verl", - "nemo-rl": "dockerfile:dockerfiles/Dockerfile.nemo-rl", + # Use NGC image with commit-based tag (e.g. 9148186-44694499). No local Dockerfile needed. + "nemo-rl": "nvcr.io/nvidian/nemo-rl:9148186-44694499", } diff --git a/nemo_skills/pipeline/nemo_rl/grpo.py b/nemo_skills/pipeline/nemo_rl/grpo.py index aff0891d3d..b828dfddcd 100644 --- a/nemo_skills/pipeline/nemo_rl/grpo.py +++ b/nemo_skills/pipeline/nemo_rl/grpo.py @@ -136,8 +136,8 @@ def get_cmd(self): self.logging_params = self.format_wandb_args() nsight_cmd = get_nsight_cmd(self.profile_step_range) cmd = ( - f"export PYTHONPATH=$PYTHONPATH:/nemo_run/code:/opt/NeMo-RL && " - f"export UV_PROJECT=/opt/NeMo-RL && " + f"export PYTHONPATH=$PYTHONPATH:/nemo_run/code:/opt/nemo-rl && " + f"export UV_PROJECT=/opt/nemo-rl && " f"{nsight_cmd}" f"echo 'Starting training' && " f"uv run --active python /nemo_run/code/nemo_skills/training/nemo_rl/start_grpo.py " diff --git a/nemo_skills/pipeline/nemo_rl/sft.py b/nemo_skills/pipeline/nemo_rl/sft.py index 69a3e98408..1fc9fac768 100644 --- a/nemo_skills/pipeline/nemo_rl/sft.py +++ b/nemo_skills/pipeline/nemo_rl/sft.py @@ -116,11 +116,12 @@ def get_cmd(self): nsight_cmd = get_nsight_cmd(self.profile_step_range) cmd = ( - "export PYTHONPATH=$PYTHONPATH:/nemo_run/code:/opt/NeMo-RL && " - "export UV_PROJECT=/opt/NeMo-RL && " + "export PYTHONPATH=$PYTHONPATH:/nemo_run/code:/opt/nemo-rl && " + "export UV_PROJECT=/opt/nemo-rl && " f"{nsight_cmd}" "echo 'Starting training' && " - "NRL_FORCE_REBUILD_VENVS=true uv run --active " + # "NRL_FORCE_REBUILD_VENVS=true uv run --active " + "uv run --active " "python /nemo_run/code/nemo_skills/training/nemo_rl/start_sft.py " f"{self.format_train_args()} {self.format_data_args()} " f"{self.logging_params} {self.extra_arguments}" diff --git a/nemo_skills/training/nemo_rl/configs/grpo.yaml b/nemo_skills/training/nemo_rl/configs/grpo.yaml index 9ec2af3358..d2d4608ee1 100644 --- a/nemo_skills/training/nemo_rl/configs/grpo.yaml +++ b/nemo_skills/training/nemo_rl/configs/grpo.yaml @@ -10,6 +10,7 @@ grpo: use_leave_one_out_baseline: true val_period: 0 # disabled val_at_start: false + val_at_end: false overlong_filtering: false max_val_samples: 256 val_batch_size: 256 @@ -22,13 +23,20 @@ grpo: overlong_buffer_length: 128 overlong_buffer_penalty: 1 max_response_length: ${policy.max_total_sequence_length} + stop_properly_penalty_coef: null reward_scaling: enabled: false source_min: 0.0 source_max: 1.0 target_min: 0.0 target_max: 1.0 - + seq_logprob_error_threshold: null + adv_estimator: + name: "grpo" # Use "reinforce_plus_plus" for Reinforce++ estimator + normalize_rewards: true + use_leave_one_out_baseline: false + minus_baseline: true + # Reinforce++-baseline specific: subtract per-prompt mean baseline async_grpo: enabled: false # Set to true to enable async training mode # Max age (in training steps) for trajectories used in training @@ -52,9 +60,12 @@ loss_fn: # Set to true when async_grpo.enabled is true use_importance_sampling_correction: false truncated_importance_sampling_ratio: null + truncated_importance_sampling_ratio_min: null # Lower bound for ICE-POP + truncated_importance_sampling_type: tis # "tis" (clamp to max) or "icepop" (filter outside [min, max]) sequence_level_importance_ratios: false token_level_loss: true force_on_policy_ratio: false # Set to true to force ratio=1.0 (requires train_global_batch_size == num_prompts_per_step * num_generations_per_prompt) + use_kl_in_reward: false # Reinforce++: add KL penalty to reward instead of loss checkpointing: enabled: true @@ -74,9 +85,9 @@ policy: chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true hf_config_overrides: {} train_global_batch_size: 512 - train_micro_batch_size: 4 + train_micro_batch_size: 1 generation_batch_size: 32 # Only used when generating using HF backend - logprob_batch_size: 4 + logprob_batch_size: 1 max_total_sequence_length: 512 precision: "bfloat16" tensor_model_parallel_size: 1 @@ -123,6 +134,9 @@ policy: bias_activation_fusion: True defer_fp32_logits: False moe_per_layer_logging: False + moe_enable_deepep: false + moe_token_dispatcher_type: "allgather" + moe_shared_expert_overlap: false optimizer: optimizer: "adam" diff --git a/nemo_skills/training/nemo_rl/configs/sft.yaml b/nemo_skills/training/nemo_rl/configs/sft.yaml index 0c6d470e82..c6feda7b76 100644 --- a/nemo_skills/training/nemo_rl/configs/sft.yaml +++ b/nemo_skills/training/nemo_rl/configs/sft.yaml @@ -11,6 +11,7 @@ sft: val_global_batch_size: 32 val_micro_batch_size: 1 val_at_start: false + val_at_end: false seed: 42 checkpointing: @@ -82,7 +83,7 @@ policy: sequence_parallel: ${policy.sequence_parallel} freeze_moe_router: false moe_router_dtype: null - moe_router_load_balancing_type: "aux_loss" + moe_router_load_balancing_type: none moe_router_bias_update_rate: 1e-3 moe_permute_fusion: false #gives ~20% training perf speedup with sequence packing @@ -92,6 +93,9 @@ policy: layernorm_epsilon: 1e-6 defer_fp32_logits: False moe_per_layer_logging: False + moe_enable_deepep: false + moe_token_dispatcher_type: "allgather" + moe_shared_expert_overlap: false peft: enabled: false @@ -163,7 +167,7 @@ policy: # makes the training sequence length divisible by the tensor parallel size # this is useful for sequence parallel training make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size} - max_grad_norm: 0.0 # megatron: Zero means no clipping, FSDP: null means no clipping + max_grad_norm: 1.0 # megatron: Zero means no clipping, FSDP: null means no clipping optimizer: name: "torch.optim.AdamW" diff --git a/nemo_skills/training/nemo_rl/start_sft.py b/nemo_skills/training/nemo_rl/start_sft.py index d6eb5e4e79..a91abfeb10 100644 --- a/nemo_skills/training/nemo_rl/start_sft.py +++ b/nemo_skills/training/nemo_rl/start_sft.py @@ -368,7 +368,6 @@ def main(): loss_fn, master_config, logger, - sft_task_spec, checkpointer, sft_save_state, ) diff --git a/tests/gpu-tests/test-local.yaml b/tests/gpu-tests/test-local.yaml index 59860356c3..a3f299b9c9 100644 --- a/tests/gpu-tests/test-local.yaml +++ b/tests/gpu-tests/test-local.yaml @@ -22,7 +22,7 @@ containers: nemo-skills: dockerfile:dockerfiles/Dockerfile.nemo-skills megatron: dockerfile:dockerfiles/Dockerfile.megatron verl: dockerfile:dockerfiles/Dockerfile.verl - nemo-rl: dockerfile:dockerfiles/Dockerfile.nemo-rl + nemo-rl: nvcr.io/nvidian/nemo-rl:9148186-44694499 mounts: - /tmp:/tmp diff --git a/tests/gpu-tests/test_eval.py b/tests/gpu-tests/test_eval.py index 73cb085a01..be2228850c 100644 --- a/tests/gpu-tests/test_eval.py +++ b/tests/gpu-tests/test_eval.py @@ -87,8 +87,8 @@ def test_aaa_prepare_and_eval_all_datasets(): judge_datasets = [] for dataset in dataset_names: dataset_module = import_module(f"nemo_skills.dataset.{dataset}") - # Check if JUDGE_PIPELINE_ARGS exists (even if empty dict, which is falsy) - if hasattr(dataset_module, "JUDGE_PIPELINE_ARGS"): + # Check if JUDGE_PIPELINE_ARGS or JUDGE_ARGS exists (either means judge is required) + if hasattr(dataset_module, "JUDGE_PIPELINE_ARGS") or hasattr(dataset_module, "JUDGE_ARGS"): judge_datasets.append(dataset) non_judge_datasets = [dataset for dataset in dataset_names if dataset not in judge_datasets]