fix: add wait loop for pending pods in get_job_logs #1171
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: E2E Test | |
| on: pull_request | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| e2e-test: | |
| name: E2E Test | |
| runs-on: oracle-vm-16cpu-64gb-x86-64 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| kubernetes-version: ["1.32.3", "1.33.1", "1.34.0", "1.35.0"] | |
| trainer-ref: ["master"] | |
| steps: | |
| - name: Checkout Kubeflow SDK repository | |
| uses: actions/checkout@v6 | |
| - name: Checkout Kubeflow Trainer repository | |
| uses: actions/checkout@v6 | |
| with: | |
| repository: kubeflow/trainer | |
| ref: ${{ matrix.trainer-ref }} | |
| path: trainer | |
| - name: Setup Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: 3.11 | |
| - name: Install Python dependencies | |
| run: | | |
| echo "Installing Papermill and Jupyter" | |
| pip install papermill==2.6.0 jupyter==1.1.1 ipykernel==6.29.5 | |
| echo "Installing Kubeflow SDK from source, with Docker support" | |
| pip install ".[docker]" | |
| working-directory: . # Ensure pip runs from the SDK repo root | |
| - name: Setup cluster | |
| run: | | |
| cd ./trainer | |
| make test-e2e-setup-cluster \ | |
| K8S_VERSION=${{ matrix.kubernetes-version }} \ | |
| working-directory: . # Execute make from the root of the SDK repo | |
| - name: Run e2e test for example Notebooks | |
| run: | | |
| mkdir -p artifacts/notebooks # Create the output directory | |
| cd ./trainer | |
| # Execute make commands, passing notebook paths and output locations | |
| make test-e2e-notebook \ | |
| NOTEBOOK_INPUT=./examples/pytorch/image-classification/mnist.ipynb \ | |
| NOTEBOOK_OUTPUT=../artifacts/notebooks/${{ matrix.kubernetes-version }}_mnist.ipynb \ | |
| PAPERMILL_TIMEOUT=900 | |
| make test-e2e-notebook \ | |
| NOTEBOOK_INPUT=./examples/pytorch/question-answering/fine-tune-distilbert.ipynb \ | |
| NOTEBOOK_OUTPUT=../artifacts/notebooks/${{ matrix.kubernetes-version }}_fine-tune-distilbert.ipynb \ | |
| PAPERMILL_TIMEOUT=900 | |
| make test-e2e-notebook \ | |
| NOTEBOOK_INPUT=./examples/local/local-container-mnist.ipynb \ | |
| NOTEBOOK_OUTPUT=../artifacts/notebooks/${{ matrix.kubernetes-version }}_local-container-mnist.ipynb \ | |
| PAPERMILL_TIMEOUT=900 | |
| make test-e2e-notebook \ | |
| NOTEBOOK_INPUT=./examples/local/local-training-mnist.ipynb \ | |
| NOTEBOOK_OUTPUT=../artifacts/notebooks/${{ matrix.kubernetes-version }}_local-training-mnist.ipynb \ | |
| PAPERMILL_TIMEOUT=900 | |
| working-directory: . # Execute make from the root of the SDK repo | |
| - name: Upload Artifacts to GitHub | |
| uses: actions/upload-artifact@v6 | |
| if: always() # Ensure artifacts are uploaded even if previous steps fail | |
| with: | |
| name: ${{ matrix.kubernetes-version }} | |
| path: ./artifacts/notebooks/* # Path relative to the workspace root | |
| retention-days: 1 # |