Skip to content

Commit 3bec9aa

Browse files
authored
cp: Changes of CICD workflow (NVIDIA#3603)
Signed-off-by: oliver könig <okoenig@nvidia.com>
1 parent 33f03d2 commit 3bec9aa

File tree

1 file changed

+69
-18
lines changed

1 file changed

+69
-18
lines changed

.github/workflows/cicd-main.yml

Lines changed: 69 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,14 @@ on:
1818
- cron: 0 0 * * *
1919
push:
2020
branches:
21-
- dev
22-
- main
2321
- "pull-request/[0-9]+"
2422
- "deploy-release/*"
2523
merge_group:
2624
types: [checks_requested]
2725
workflow_dispatch:
2826

2927
concurrency:
30-
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
28+
group: ${{ github.workflow }}-${{ github.head_ref || github.ref || github.event.pull_request.number }}
3129
cancel-in-progress: true
3230

3331
permissions:
@@ -131,7 +129,7 @@ jobs:
131129
pre-flight:
132130
needs: [is-not-external-contributor]
133131
if: github.repository == 'NVIDIA/Megatron-LM'
134-
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.10
132+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
135133

136134
linting:
137135
runs-on: ubuntu-latest
@@ -179,10 +177,11 @@ jobs:
179177
cicd-wait-in-queue:
180178
runs-on: ubuntu-latest
181179
needs: [pre-flight, linting]
182-
environment: ${{ needs.pre-flight.outputs.is_merge_group == 'true' && 'merge-gate' || 'test' }}
180+
environment: "test"
183181
if: |
184182
!(needs.pre-flight.outputs.is_ci_workload == 'true'
185183
|| needs.pre-flight.outputs.is_deployment_workflow == 'true'
184+
|| needs.pre-flight.outputs.is_merge_group == 'true'
186185
|| needs.pre-flight.outputs.docs_only == 'true')
187186
steps:
188187
- name: Running CI tests
@@ -194,12 +193,15 @@ jobs:
194193
needs: [is-not-external-contributor, pre-flight, cicd-wait-in-queue]
195194
runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
196195
if: |
197-
(
196+
needs.is-not-external-contributor.result != 'cancelled'
197+
&& needs.pre-flight.result != 'cancelled'
198+
&& needs.cicd-wait-in-queue.result != 'cancelled'
199+
&& (
198200
success()
199201
|| needs.pre-flight.outputs.is_ci_workload == 'true'
202+
|| needs.pre-flight.outputs.is_merge_group == 'true'
200203
|| needs.pre-flight.outputs.force_run_all == 'true'
201204
)
202-
&& needs.pre-flight.outputs.is_merge_group == 'false'
203205
&& !cancelled()
204206
steps:
205207
- name: Get PR info
@@ -212,9 +214,12 @@ jobs:
212214
id: sha
213215
env:
214216
IS_PR: ${{ startsWith(github.ref, 'refs/heads/pull-request/') }}
217+
IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
215218
run: |
216219
if [[ "$IS_PR" == "true" ]]; then
217220
SHA=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').merge_commit_sha }}
221+
elif [[ "$IS_MERGE_GROUP" == "true" ]]; then
222+
SHA=${{ github.event.merge_group.head_sha }}
218223
else
219224
SHA=${GITHUB_SHA}
220225
fi
@@ -332,12 +337,15 @@ jobs:
332337
- cicd-wait-in-queue
333338
- cicd-container-build
334339
if: |
335-
(
340+
needs.pre-flight.result != 'cancelled'
341+
&& needs.cicd-wait-in-queue.result != 'cancelled'
342+
&& needs.cicd-container-build.result != 'cancelled'
343+
&& (
336344
success()
337345
|| needs.pre-flight.outputs.is_ci_workload == 'true'
338346
|| needs.pre-flight.outputs.force_run_all == 'true'
347+
|| needs.pre-flight.outputs.is_merge_group == 'true'
339348
)
340-
&& needs.pre-flight.outputs.is_merge_group == 'false'
341349
&& !cancelled()
342350
steps:
343351
- name: Checkout
@@ -363,12 +371,17 @@ jobs:
363371
timeout-minutes: 60
364372
name: "${{ matrix.bucket }} - latest"
365373
if: |
366-
(
374+
needs.is-not-external-contributor.result != 'cancelled'
375+
&& needs.pre-flight.result != 'cancelled'
376+
&& needs.cicd-wait-in-queue.result != 'cancelled'
377+
&& needs.cicd-container-build.result != 'cancelled'
378+
&& needs.cicd-parse-unit-tests.result != 'cancelled'
379+
&& (
367380
success()
368381
|| needs.pre-flight.outputs.is_ci_workload == 'true'
369382
|| needs.pre-flight.outputs.force_run_all == 'true'
383+
|| needs.pre-flight.outputs.is_merge_group == 'true'
370384
)
371-
&& needs.pre-flight.outputs.is_merge_group == 'false'
372385
&& !cancelled()
373386
env:
374387
PIP_DISABLE_PIP_VERSION_CHECK: 1
@@ -390,14 +403,17 @@ jobs:
390403

391404
cicd-parse-integration-tests:
392405
runs-on: ubuntu-latest
393-
timeout-minutes: 60
394406
needs:
395407
- pre-flight
396408
- cicd-wait-in-queue
397409
- cicd-container-build
398410
- cicd-unit-tests-latest
399411
if: |
400-
(
412+
needs.pre-flight.result != 'cancelled'
413+
&& needs.cicd-wait-in-queue.result != 'cancelled'
414+
&& needs.cicd-container-build.result != 'cancelled'
415+
&& needs.cicd-unit-tests-latest.result != 'cancelled'
416+
&& (
401417
success()
402418
|| needs.pre-flight.outputs.is_ci_workload == 'true'
403419
|| needs.pre-flight.outputs.force_run_all == 'true'
@@ -439,7 +455,7 @@ jobs:
439455
id: main
440456
env:
441457
HAS_RUN_TESTS_LABEL: ${{ steps.has-run-tests-label.outputs.main }}
442-
HAS_RUN_FUNCTIONAL_TESTS_LABEL: ${{ steps.has-run-functional-tests-label.outputs.main }}
458+
HAS_RUN_FUNCTIONAL_TESTS_LABEL: ${{ steps.has-run-functional-tests-label.outputs.main == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' }}
443459
run: |
444460
export PYTHONPATH=$(pwd)
445461
@@ -480,6 +496,7 @@ jobs:
480496
echo "integration-tests=$(cat integration-tests.json)" | tee -a "$GITHUB_OUTPUT"
481497
482498
cicd-integration-tests-latest:
499+
timeout-minutes: 60
483500
strategy:
484501
fail-fast: false
485502
matrix:
@@ -497,12 +514,17 @@ jobs:
497514
PIP_NO_PYTHON_VERSION_WARNING: 1
498515
PIP_ROOT_USER_ACTION: ignore
499516
if: |
500-
(
517+
needs.is-not-external-contributor.result != 'cancelled'
518+
&& needs.pre-flight.result != 'cancelled'
519+
&& needs.cicd-wait-in-queue.result != 'cancelled'
520+
&& needs.cicd-parse-integration-tests.result != 'cancelled'
521+
&& needs.cicd-unit-tests-latest.result != 'cancelled'
522+
&& (
501523
success()
502524
|| needs.pre-flight.outputs.is_ci_workload == 'true'
503525
|| needs.pre-flight.outputs.force_run_all == 'true'
526+
|| needs.pre-flight.outputs.is_merge_group == 'true'
504527
)
505-
&& needs.pre-flight.outputs.is_merge_group == 'false'
506528
&& !cancelled()
507529
steps:
508530
- name: Checkout
@@ -518,6 +540,7 @@ jobs:
518540
PAT: ${{ secrets.PAT }}
519541
container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
520542
is_ci_workload: ${{ needs.pre-flight.outputs.is_ci_workload }}
543+
is_merge_group: ${{ needs.pre-flight.outputs.is_merge_group }}
521544

522545
Nemo_CICD_Test:
523546
needs:
@@ -548,8 +571,8 @@ jobs:
548571
GITHUB_RUN_ID: ${{ github.run_id }}
549572
SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
550573
run: |
551-
FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "failure")] | length') || echo 0
552-
SKIPPED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "skipped")] | length') || echo 0
574+
FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "failure" and .name != "merge-queue-notification" and .name != "cicd-mbridge-testing")] | length') || echo 0
575+
SKIPPED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "skipped" and .name != "merge-queue-notification" and .name != "cicd-mbridge-testing")] | length') || echo 0
553576
554577
if [ "${FAILED_JOBS:-0}" -eq 0 ] && ([ "${SKIPPED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]); then
555578
echo "✅ All previous jobs completed successfully"
@@ -641,6 +664,34 @@ jobs:
641664
.coverage
642665
include-hidden-files: true
643666

667+
merge-queue-notification:
668+
runs-on: ubuntu-latest
669+
if: github.event_name == 'merge_group'
670+
permissions:
671+
pull-requests: write
672+
steps:
673+
- name: Extract PR number from merge group
674+
id: get-pr-number
675+
run: |
676+
# Extract PR number from merge group head_ref (format: refs/heads/gh-readonly-queue/main/pr-<number>-<sha>)
677+
PR_NUMBER=$(echo "${{ github.event.merge_group.head_ref }}" | sed -n 's/.*\/pr-\([0-9]*\)-.*/\1/p')
678+
echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
679+
680+
- name: Comment on PR with action run URL
681+
uses: actions/github-script@v7
682+
with:
683+
github-token: ${{ secrets.PAT }}
684+
script: |
685+
const prNumber = ${{ steps.get-pr-number.outputs.pr_number }};
686+
const runUrl = `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`;
687+
688+
await github.rest.issues.createComment({
689+
owner: context.repo.owner,
690+
repo: context.repo.repo,
691+
issue_number: prNumber,
692+
body: `🔄 Merge queue validation started!\n\nYou can track the progress here: ${runUrl}`
693+
});
694+
644695
cleanup-taint-node:
645696
runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
646697
needs:

0 commit comments

Comments
 (0)