Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 1 addition & 43 deletions .github/actions/build-container/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,9 @@
name: Build container

inputs:
azure-client-id:
description: "Azure Client ID"
required: true
azure-tenant-id:
description: "Azure Tenant ID"
required: true
azure-subscription-id:
description: "Azure Subscription ID"
required: true
dockerfile-path:
description: "Path to dockerfile to build"
required: true
has-azure-credentials:
description: "Has Azure credentials"
required: false
default: "false"
PAT:
description: "GitHub Personal Access Token"
required: true
Expand All @@ -40,7 +27,7 @@ inputs:
type: string

env:
container-registry: nemoci.azurecr.io
container-registry: 766267172432.dkr.ecr.us-east-1.amazonaws.com

runs:
using: "composite"
Expand Down Expand Up @@ -68,35 +55,6 @@ runs:
with:
ref: ${{ steps.sha.outputs.main }}

- name: Setup python
uses: actions/setup-python@v6
with:
python-version: 3.12

- name: Install Azure CLI
shell: bash
run: |
echo "::group::Install Azure CLI"
# Create systemd override for proper dependencies
for i in 1 2 3; do
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && break
echo "Attempt $i failed, retrying in 10s..."
sleep 10
done
echo "::endgroup::"

- name: Azure Login
uses: azure/login@v2
with:
client-id: ${{ inputs.azure-client-id }}
tenant-id: ${{ inputs.azure-tenant-id }}
subscription-id: ${{ inputs.azure-subscription-id }}

- name: Azure ACR Login
shell: bash
run: |
az acr login --name nemoci

- name: Install GH CLI
shell: bash
run: |
Expand Down
38 changes: 0 additions & 38 deletions .github/actions/test-template/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,6 @@ inputs:
description: "Run tests on CPU only"
required: false
default: "false"
azure-client-id:
description: "Azure Client ID"
required: true
azure-tenant-id:
description: "Azure Tenant ID"
required: true
azure-subscription-id:
description: "Azure Subscription ID"
required: true
has-azure-credentials:
description: "Has Azure credentials"
required: false
default: "false"
PAT:
description: "GitHub Personal Access Token"
required: true
Expand All @@ -63,31 +50,6 @@ inputs:
runs:
using: "composite"
steps:
- name: Install Azure CLI
if: ${{ inputs.has-azure-credentials == 'true' }}
shell: bash
run: |
echo "::group::Install Azure CLI"
for i in 1 2 3; do
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && break
echo "Attempt $i failed, retrying in 10s..."
sleep 10
done
echo "::endgroup::"

- name: Azure Login
if: ${{ inputs.has-azure-credentials == 'true' }}
uses: azure/login@v2
with:
client-id: ${{ inputs.azure-client-id }}
tenant-id: ${{ inputs.azure-tenant-id }}
subscription-id: ${{ inputs.azure-subscription-id }}

- name: Azure ACR Login
shell: bash
run: |
az acr login --name nemoci

- name: Install uuidgen
shell: bash -x -e -u -o pipefail {0}
if: ${{ contains(inputs.runner, 'aws') }}
Expand Down
48 changes: 16 additions & 32 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ permissions:
pull-requests: read

env:
container-registry: nemoci.azurecr.io
container-registry: 766267172432.dkr.ecr.us-east-1.amazonaws.com

jobs:
pre-flight:
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.80.1
with:
default_runner_prefix: ${{ vars.DEFAULT_RUNNER_PREFIX }}
non_nvidia_runner_prefix: ${{ vars.NON_NVIDIA_RUNNER_PREFIX }}
default_runner_prefix: nemo-ci-aws-gpu-x2
non_nvidia_runner_prefix: nemo-ci-aws-gpu-x2-ephemeral
default_test_data_path: ${{ vars.DEFAULT_TEST_DATA_PATH }}
non_nvidia_test_data_path: ${{ vars.NON_NVIDIA_TEST_DATA_PATH }}
sso_users_filename: ${{ vars.SSO_USERS_FILENAME }}
Expand Down Expand Up @@ -200,19 +200,14 @@ jobs:
|| needs.pre-flight.outputs.force_run_all == 'true'
)
&& !cancelled()
runs-on: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
environment: nemo-ci
runs-on: ${{ needs.pre-flight.outputs.runner_prefix }}
steps:
- name: Checkout
uses: actions/checkout@v6
- name: main
uses: ./.github/actions/build-container
with:
azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
dockerfile-path: "./docker/Dockerfile"
has-azure-credentials: "true"
PAT: ${{ secrets.PAT }}
repo-name: ${{ vars.CI_REPO_NAME || 'Automodel' }}

Expand All @@ -223,16 +218,15 @@ jobs:
matrix:
include:
- test-name: L0_Unit_Tests_CPU
runner: linux-amd64-cpu16
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
cpu-only: true
- test-name: L0_Unit_Tests_GPU
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
cpu-only: false
timeout: 30
needs: [pre-flight, cicd-wait-in-queue, cicd-container-build]
runs-on: ${{ matrix.runner }}
name: ${{ matrix.test-name }}
environment: nemo-ci
if: |
(
success()
Expand All @@ -249,10 +243,6 @@ jobs:
timeout: ${{ matrix.timeout || 10 }}
is-unit-test: "true"
cpu-only: ${{ matrix.cpu-only || false }}
has-azure-credentials: "true"
azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
PAT: ${{ secrets.PAT }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
container-image: ${{ env.container-registry }}/${{ vars.CI_CONTAINER_NAME || 'automodel' }}:${{ github.sha }}
Expand All @@ -267,57 +257,56 @@ jobs:
- test-name: L2_Pretrain_and_KD
test-folder: llm_pretrain_and_kd
timeout: 20
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
- test-name: L2_HF_DCP
test-folder: hf_dcp
timeout: 40
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
- test-name: L2_HF_PEFT
test-folder: hf_peft
timeout: 30
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
- test-name: L2_HF_Transformer
test-folder: hf_transformer
timeout: 20
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
- test-name: L2_HF_Transformer_Finetune
test-folder: hf_transformer_finetune
timeout: 40
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
- test-name: L2_HF_Transformer_LLM
test-folder: hf_transformer_llm
timeout: 40
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
- test-name: L2_HF_Transformer_VLM
test-folder: hf_transformer_vlm
timeout: 60
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
- test-name: L2_Datasets
test-folder: data
timeout: 20
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
- test-name: L2_Context_Parallel
test-folder: context_parallel
timeout: 20
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
- test-name: L2_Retrieval
test-folder: retrieval
timeout: 20
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2
runner: ${{ needs.pre-flight.outputs.runner_prefix }}
test-data-path: ${{ needs.pre-flight.outputs.test_data_path }}
needs: [pre-flight, cicd-unit-tests]
runs-on: ${{ matrix.runner }}
name: ${{ matrix.test-name }}
environment: nemo-ci
if: |
(
success()
Expand All @@ -334,10 +323,6 @@ jobs:
test-folder: ${{ matrix.test-folder }}
timeout: ${{ matrix.timeout }}
is_unit_test: "false"
has-azure-credentials: "true"
azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
azure-subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
PAT: ${{ secrets.PAT }}
test-data-path: ${{ matrix.test-data-path }}
container-image: ${{ env.container-registry }}/${{ vars.CI_CONTAINER_NAME || 'automodel' }}:${{ github.sha }}
Expand Down Expand Up @@ -393,7 +378,6 @@ jobs:
&& needs.pre-flight.outputs.is_ci_workload == 'false'
&& !cancelled()
&& vars.ENABLE_CODECOV == 'true'
environment: nemo-ci
steps:
- name: Generate fake coverage report
uses: actions/github-script@v8
Expand Down
57 changes: 2 additions & 55 deletions .github/workflows/config/.secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
"filename": ".github/actions/build-container/action.yml",
"hashed_secret": "de4ecef3e7306203de51b2689836ae209612f4b9",
"is_verified": false,
"line_number": 156
"line_number": 121
}
],
".github/workflows/release-nightly-docs.yml": [
Expand All @@ -156,15 +156,6 @@
"line_number": 62
}
],
"examples/benchmark/configs/peft/nemotron_super_v3_lora.yaml": [
{
"type": "Base64 High Entropy String",
"filename": "examples/benchmark/configs/peft/nemotron_super_v3_lora.yaml",
"hashed_secret": "4702220fec9261c7d9b0c427aceeca118b66c702",
"is_verified": false,
"line_number": 63
}
],
"examples/llm_finetune/nemotron/nemotron_super_v3_hellaswag.yaml": [
{
"type": "Base64 High Entropy String",
Expand All @@ -183,50 +174,6 @@
"line_number": 42
}
],
"examples/vlm_finetune/nemotron/parse-ft-tutorial/parse_finetune_tutorial.ipynb": [
{
"type": "Hex High Entropy String",
"filename": "examples/vlm_finetune/nemotron/parse-ft-tutorial/parse_finetune_tutorial.ipynb",
"hashed_secret": "710c91edd7984b025f47830ec2ba0506894eed3f",
"is_verified": false,
"line_number": 64
},
{
"type": "Hex High Entropy String",
"filename": "examples/vlm_finetune/nemotron/parse-ft-tutorial/parse_finetune_tutorial.ipynb",
"hashed_secret": "e1f9a3f35c6bff77f847a39b4c4a5c9eb2892ecb",
"is_verified": false,
"line_number": 657
},
{
"type": "Hex High Entropy String",
"filename": "examples/vlm_finetune/nemotron/parse-ft-tutorial/parse_finetune_tutorial.ipynb",
"hashed_secret": "4f248bc40ab0f68e2dceb22e38c52828ee276179",
"is_verified": false,
"line_number": 698
},
{
"type": "Hex High Entropy String",
"filename": "examples/vlm_finetune/nemotron/parse-ft-tutorial/parse_finetune_tutorial.ipynb",
"hashed_secret": "d73541f2527eda15a3f3ab0018e4047afc81c7eb",
"is_verified": false,
"line_number": 752
},
{
"type": "Hex High Entropy String",
"filename": "examples/vlm_finetune/nemotron/parse-ft-tutorial/parse_finetune_tutorial.ipynb",
"hashed_secret": "336e39a34addaaa1dcefb999629f7106e531126e",
"is_verified": false,
"line_number": 771
},
{
"type": "Hex High Entropy String",
"filename": "examples/vlm_finetune/nemotron/parse-ft-tutorial/parse_finetune_tutorial.ipynb",
"hashed_secret": "b98ce2f8214f1c209f6371e4247784ef35088cfb",
"is_verified": false,
"line_number": 826
}
],
"tests/unit_tests/config/test_allowed_import_prefixes.py": [
{
"type": "Secret Keyword",
Expand Down Expand Up @@ -276,5 +223,5 @@
}
]
},
"generated_at": "2026-04-03T21:56:52Z"
"generated_at": "2026-04-18T10:18:13Z"
}
Loading