Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 5 additions & 144 deletions dockerfiles/Dockerfile.nemo-rl
Original file line number Diff line number Diff line change
@@ -1,149 +1,10 @@
# syntax=docker/dockerfile:1
# copied and edited from https://github.com/NVIDIA/NeMo-RL/blob/main/docker/Dockerfile
# TODO: from next update try to re-use their dockerfile as is as they support specifying the commit
# Lightweight Dockerfile: use pre-built nvcr.io/nvidian/nemo-rl:nightly and only add NeMo-Skills.
# To use the image without building at all, set containers.nemo-rl to nvcr.io/nvidian/nemo-rl:nightly
# in your cluster config (see cluster_configs/example-local.yaml).

ARG BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base:25.05-cuda12.9-devel-ubuntu24.04
ARG NEMO_RL_IMAGE=nvcr.io/nvidian/nemo-rl:nightly

FROM scratch AS nemo-rl
FROM ${NEMO_RL_IMAGE}

ARG NEMO_RL_COMMIT=${NEMO_RL_COMMIT:-e95efb912a6909b5da91ffeb197debe91fd480d8}
ADD --keep-git-dir=true https://github.com/NVIDIA-NeMo/RL.git#${NEMO_RL_COMMIT} /


FROM ${BASE_IMAGE} AS base
# An environment variable to indicate that we are in a container.
ENV NRL_CONTAINER=1

# It is more convenient for users to run as root
USER root

RUN <<"EOF" bash -exu -o pipefail
export DEBIAN_FRONTEND=noninteractive
export TZ=America/Los_Angeles

apt-get update
apt-get install -y --no-install-recommends \
jq \
curl \
git \
rsync \
wget \
less \
vim \

# Nsight
apt install -y --no-install-recommends gnupg
echo "deb http://developer.download.nvidia.com/devtools/repos/ubuntu$(source /etc/lsb-release; echo "$DISTRIB_RELEASE" | tr -d .)/$(dpkg --print-architecture) /" | tee /etc/apt/sources.list.d/nvidia-devtools.list
apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
apt update
apt install -y nsight-systems-cli

# To fix CVE-2025-68973
apt install -y --only-upgrade gnupg

apt-get clean
rm -rf /var/lib/apt/lists/*
EOF

# Install uv and python
ARG UV_VERSION=0.9.7
ARG PYTHON_VERSION=3.12
ENV PATH="/root/.local/bin:$PATH"
RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \
uv python install ${PYTHON_VERSION}

# Disable usage stats by default for users who are sensitive to sharing usage.
# Users are encouraged to enable if the wish.
ENV RAY_USAGE_STATS_ENABLED=0
# After ray>=2.47, this feature is enabled by default which creates uv venvs for any py_executable starting with `uv run`.
# There is severe contention and performance issues with this enabled considering our dependencies are so large and occasionally
# need to be compiled, so NeMo RL has an implementation in nemo_rl/utils/venv.py that does it once per node as opposed to once per task.
ENV RAY_ENABLE_UV_RUN_RUNTIME_ENV=0
ENV NEMO_RL_VENV_DIR=/opt/ray_venvs


FROM base AS hermetic

WORKDIR /opt/NeMo-RL

# Variables to control the build of TE. If there are issues with parallelization, consider
# setting these to 1.
ARG MAX_JOBS
ARG NVTE_BUILD_THREADS_PER_JOB
# Only use for custom vllm installs. Learn more at https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md
ARG BUILD_CUSTOM_VLLM

ENV UV_PROJECT_ENVIRONMENT=/opt/nemo_rl_venv
ENV UV_LINK_MODE=copy

# Ensure DeepEP is built for H100 and B200 (also mcore inference unified memory API now invokes a torch API that requires these to be set)
ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"

# First copy only the dependency files
COPY --from=nemo-rl pyproject.toml uv.lock ./
# Copy in the top level __init__.py/package_info.py since build-custom-vllm.sh needs the nemo_rl package to exist.
COPY --from=nemo-rl nemo_rl/__init__.py nemo_rl/package_info.py ./nemo_rl/
COPY --from=nemo-rl tools/build-custom-vllm.sh ./tools/build-custom-vllm.sh
COPY --from=nemo-rl --link research/ ./research/
COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/

RUN --mount=type=ssh <<"EOF" bash -exu
uv venv --seed
if [[ -n "${BUILD_CUSTOM_VLLM:-}" ]]; then
bash tools/build-custom-vllm.sh
source 3rdparty/vllm/nemo-rl.env
fi
# uv sync has a more reliable resolver than simple uv pip install which can fail

# Sync each training + inference backend one at a time (since they may conflict)
# to warm the uv cache, then at the end just sync the default dependencies.
# Do everything in one layer to prevent large layers.

# The venv is symlinked to avoid bloating the layer size
uv sync --link-mode symlink --locked --no-install-project
uv sync --link-mode symlink --locked --extra vllm --no-install-project
uv sync --link-mode symlink --locked --extra mcore --no-install-project
uv sync --link-mode symlink --locked --extra automodel --no-install-project
uv sync --link-mode symlink --locked --all-groups --no-install-project

# Remove the aiohttp in this uv cache dir to fully address CVE GHSA-mqqc-3gqh-h2x8
# The ray install will include the older aiohttp version in its cache
find /root/.cache/uv -type d -path "*ray/_private/runtime_env/agent/thirdparty_files/aiohttp*" -exec rm -rf {} +
EOF

ENV PATH="/opt/nemo_rl_venv/bin:$PATH"
ENV NEMO_RL_VENV_DIR=/opt/ray_venvs

WORKDIR /opt/NeMo-RL

FROM hermetic AS release

ARG NVIDIA_BUILD_ID
ARG NVIDIA_BUILD_REF
ARG RC_DATE=00.00
ARG TARGETARCH
ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID:-<unknown>}
ENV NVIDIA_BUILD_REF=${NVIDIA_BUILD_REF:-<unknown>}
LABEL com.nvidia.build.id="${NVIDIA_BUILD_ID}"
LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}"

ENV NEMO_RL_VENV_DIR=/opt/ray_venvs

# Copy in source from build context (defaults to cloned repo, can be overridden)
# Exclude pyproject.toml and uv.lock since those may be altered by build-custom-vllm.sh
COPY --from=nemo-rl --exclude=pyproject.toml --exclude=uv.lock . /opt/NeMo-RL
# Unshallow the repo to get the full history (in the case it was from the scratch layer).
# Potentially not necessary if the repo is passed in as a complete repository (w/ full git history),
# so do a quick check before trying to unshallow.
RUN git rev-parse --is-shallow-repository | grep -q true && git fetch --unshallow || true
RUN UV_LINK_MODE=symlink uv run nemo_rl/utils/prefetch_venvs.py

# Generate container fingerprint for frozen environment support
# Store outside /opt/NeMo-RL to avoid being overwritten by user mounts
RUN python tools/generate_fingerprint.py > /opt/nemo_rl_container_fingerprint

# NOTICES.txt file points to where the OSS source code is archived
RUN echo "This distribution includes open source which is archived at the following URL: https://opensource.nvidia.com/oss/teams/nvidia/nemo-rl/${RC_DATE}:linux-${TARGETARCH}/index.html" > NOTICES.txt && \
echo "For further inquiries or assistance, contact us at oss-requests@nvidia.com" >> NOTICES.txt

RUN git clone https://github.com/NVIDIA-NeMo/Skills.git /opt/NeMo-Skills && cd /opt/NeMo-Skills && uv pip install .
4 changes: 2 additions & 2 deletions nemo_skills/pipeline/nemo_rl/grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ def get_cmd(self):
self.logging_params = self.format_wandb_args()
nsight_cmd = get_nsight_cmd(self.profile_step_range)
cmd = (
f"export PYTHONPATH=$PYTHONPATH:/nemo_run/code:/opt/NeMo-RL && "
f"export UV_PROJECT=/opt/NeMo-RL && "
f"export PYTHONPATH=$PYTHONPATH:/nemo_run/code:/opt/nemo-rl && "
f"export UV_PROJECT=/opt/nemo-rl && "
f"{nsight_cmd}"
f"echo 'Starting training' && "
f"uv run --active python /nemo_run/code/nemo_skills/training/nemo_rl/start_grpo.py "
Expand Down
7 changes: 4 additions & 3 deletions nemo_skills/pipeline/nemo_rl/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,12 @@ def get_cmd(self):

nsight_cmd = get_nsight_cmd(self.profile_step_range)
cmd = (
"export PYTHONPATH=$PYTHONPATH:/nemo_run/code:/opt/NeMo-RL && "
"export UV_PROJECT=/opt/NeMo-RL && "
"export PYTHONPATH=$PYTHONPATH:/nemo_run/code:/opt/nemo-rl && "
"export UV_PROJECT=/opt/nemo-rl && "
f"{nsight_cmd}"
"echo 'Starting training' && "
"NRL_FORCE_REBUILD_VENVS=true uv run --active "
# "NRL_FORCE_REBUILD_VENVS=true uv run --active "
"uv run --active "
"python /nemo_run/code/nemo_skills/training/nemo_rl/start_sft.py "
f"{self.format_train_args()} {self.format_data_args()} "
f"{self.logging_params} {self.extra_arguments}"
Expand Down
20 changes: 17 additions & 3 deletions nemo_skills/training/nemo_rl/configs/grpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ grpo:
use_leave_one_out_baseline: true
val_period: 0 # disabled
val_at_start: false
val_at_end: false
overlong_filtering: false
max_val_samples: 256
val_batch_size: 256
Expand All @@ -22,13 +23,20 @@ grpo:
overlong_buffer_length: 128
overlong_buffer_penalty: 1
max_response_length: ${policy.max_total_sequence_length}
stop_properly_penalty_coef: null
reward_scaling:
enabled: false
source_min: 0.0
source_max: 1.0
target_min: 0.0
target_max: 1.0

seq_logprob_error_threshold: null
adv_estimator:
name: "grpo" # Use "reinforce_plus_plus" for Reinforce++ estimator
normalize_rewards: true
use_leave_one_out_baseline: false
minus_baseline: true
# Reinforce++-baseline specific: subtract per-prompt mean baseline
async_grpo:
enabled: false # Set to true to enable async training mode
# Max age (in training steps) for trajectories used in training
Expand All @@ -52,9 +60,12 @@ loss_fn:
# Set to true when async_grpo.enabled is true
use_importance_sampling_correction: false
truncated_importance_sampling_ratio: null
truncated_importance_sampling_ratio_min: null # Lower bound for ICE-POP
truncated_importance_sampling_type: tis # "tis" (clamp to max) or "icepop" (filter outside [min, max])
sequence_level_importance_ratios: false
token_level_loss: true
force_on_policy_ratio: false # Set to true to force ratio=1.0 (requires train_global_batch_size == num_prompts_per_step * num_generations_per_prompt)
use_kl_in_reward: false # Reinforce++: add KL penalty to reward instead of loss

checkpointing:
enabled: true
Expand All @@ -74,9 +85,9 @@ policy:
chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
hf_config_overrides: {}
train_global_batch_size: 512
train_micro_batch_size: 4
train_micro_batch_size: 1
generation_batch_size: 32 # Only used when generating using HF backend
logprob_batch_size: 4
logprob_batch_size: 1
max_total_sequence_length: 512
precision: "bfloat16"
tensor_model_parallel_size: 1
Expand Down Expand Up @@ -123,6 +134,9 @@ policy:
bias_activation_fusion: True
defer_fp32_logits: False
moe_per_layer_logging: False
moe_enable_deepep: false
moe_token_dispatcher_type: "allgather"
moe_shared_expert_overlap: false

optimizer:
optimizer: "adam"
Expand Down
8 changes: 6 additions & 2 deletions nemo_skills/training/nemo_rl/configs/sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ sft:
val_global_batch_size: 32
val_micro_batch_size: 1
val_at_start: false
val_at_end: false
seed: 42

checkpointing:
Expand Down Expand Up @@ -82,7 +83,7 @@ policy:
sequence_parallel: ${policy.sequence_parallel}
freeze_moe_router: false
moe_router_dtype: null
moe_router_load_balancing_type: "aux_loss"
moe_router_load_balancing_type: none
moe_router_bias_update_rate: 1e-3
moe_permute_fusion: false
#gives ~20% training perf speedup with sequence packing
Expand All @@ -92,6 +93,9 @@ policy:
layernorm_epsilon: 1e-6
defer_fp32_logits: False
moe_per_layer_logging: False
moe_enable_deepep: false
moe_token_dispatcher_type: "allgather"
moe_shared_expert_overlap: false

peft:
enabled: false
Expand Down Expand Up @@ -163,7 +167,7 @@ policy:
# makes the training sequence length divisible by the tensor parallel size
# this is useful for sequence parallel training
make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
max_grad_norm: 0.0 # megatron: Zero means no clipping, FSDP: null means no clipping
max_grad_norm: 1.0 # megatron: Zero means no clipping, FSDP: null means no clipping

optimizer:
name: "torch.optim.AdamW"
Expand Down
1 change: 0 additions & 1 deletion nemo_skills/training/nemo_rl/start_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,6 @@ def main():
loss_fn,
master_config,
logger,
sft_task_spec,
checkpointer,
sft_save_state,
)
Expand Down
3 changes: 2 additions & 1 deletion tests/gpu-tests/test-local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ containers:
nemo-skills: dockerfile:dockerfiles/Dockerfile.nemo-skills
megatron: dockerfile:dockerfiles/Dockerfile.megatron
verl: dockerfile:dockerfiles/Dockerfile.verl
nemo-rl: dockerfile:dockerfiles/Dockerfile.nemo-rl
nemo-rl: gitlab-master.nvidia.com/dl/ai-services/docker-images/igitman/nemo-skills-nemo-rl:latest
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

mounts:
- /tmp:/tmp
# change this if the models are located in a different place
# TODO: can we make it simpler?
- /mnt/datadrive/nemo-skills-test-data:/mnt/datadrive/nemo-skills-test-data
- /home/wedu:/home/wedu
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

env_vars:
- HF_HOME=/mnt/datadrive/nemo-skills-test-data/hf-cache
Loading