feat(docs): proposal for adding TTLSecondsAfterFinished and ActiveDeadlineSeconds fields to TrainJob CRD #2764
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: GPU E2E Test | |
| on: | |
| pull_request: | |
| types: [opened, reopened, synchronize, labeled] | |
| permissions: | |
| contents: read | |
| pull-requests: read | |
| jobs: | |
| gpu-e2e-test: | |
| name: GPU E2E Test | |
| runs-on: | |
| labels: oracle-vm-gpu-a10-1 | |
| group: GPUs | |
| env: | |
| GOPATH: ${{ github.workspace }}/go | |
| defaults: | |
| run: | |
| working-directory: ${{ env.GOPATH }}/src/github.com/kubeflow/trainer | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| kubernetes-version: ["1.33.1"] | |
| steps: | |
| - name: Check out code | |
| uses: actions/checkout@v6 | |
| with: | |
| ref: ${{ github.event.pull_request.head.sha }} | |
| path: ${{ env.GOPATH }}/src/github.com/kubeflow/trainer | |
| - name: Setup Go | |
| uses: actions/setup-go@v6 | |
| with: | |
| go-version-file: ${{ env.GOPATH }}/src/github.com/kubeflow/trainer/go.mod | |
| - name: Setup Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: 3.11 | |
| - name: Install dependencies | |
| run: | | |
| pip install papermill==2.6.0 jupyter==1.1.1 ipykernel==6.29.5 | |
| pip install git+https://github.com/kubeflow/sdk.git@main | |
| - name: Setup GPU cluster with nvkind | |
| run: | | |
| make test-e2e-setup-gpu-cluster K8S_VERSION=${{ matrix.kubernetes-version }} | |
| - name: Run e2e test on GPU cluster | |
| run: | | |
| mkdir -p artifacts/notebooks | |
| make test-e2e-notebook NOTEBOOK_INPUT=./examples/torchtune/qwen2_5/qwen2.5-1.5B-with-alpaca.ipynb NOTEBOOK_OUTPUT=./artifacts/notebooks/${{ matrix.kubernetes-version }}_qwen2_5_with_alpaca-trainjob-yaml.ipynb PAPERMILL_TIMEOUT=1800 | |
| make test-e2e-notebook NOTEBOOK_INPUT=./examples/jax/image-classification/mnist.ipynb NOTEBOOK_OUTPUT=./artifacts/notebooks/${{ matrix.kubernetes-version }}_jax_mnist.ipynb PAPERMILL_PARAMS="-p num_cpu 8 -p num_gpu 1 -p num_nodes 1" PAPERMILL_TIMEOUT=1800 | |
| - name: Upload Artifacts to GitHub | |
| if: always() | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: ${{ matrix.kubernetes-version }} | |
| path: ${{ env.GOPATH }}/src/github.com/kubeflow/trainer/artifacts/* | |
| retention-days: 1 |