Skip to content

Commit 36a9c01

Browse files
committed
single pod tekton
1 parent 01ae761 commit 36a9c01

5 files changed

Lines changed: 739 additions & 70 deletions

File tree

.github/workflows/e2e-tekton.yml

Lines changed: 89 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@ name: E2E Tests via Tekton on RHOAI
22

33
# =============================================================================
44
# This workflow triggers E2E tests using Tekton Pipelines on RHOAI.
5-
# Benefits over K8s Job approach:
6-
# - Step isolation (each notebook in separate Pod)
7-
# - Built-in retry per step
8-
# - Better resource management
9-
# - Tekton Dashboard visibility
5+
# Two modes available:
6+
# - Single Pod (default): Fast startup, simple debugging, all steps in one Pod
7+
# - Multi Pod: Step isolation, per-step retry, separate Pod per notebook
108
# =============================================================================
119

1210
on:
@@ -22,6 +20,14 @@ on:
2220
- minimal
2321
- standard
2422
- extended
23+
mode:
24+
description: 'Execution mode'
25+
required: true
26+
default: 'single-pod'
27+
type: choice
28+
options:
29+
- single-pod
30+
- multi-pod
2531
skip_steps:
2632
description: 'Steps to skip (comma-separated, e.g., "1,5,6")'
2733
required: false
@@ -33,13 +39,12 @@ on:
3339
default: 'main'
3440
type: string
3541

36-
# Weekly scheduled run
42+
# Weekly scheduled run (uses single-pod mode)
3743
schedule:
3844
- cron: '0 3 * * 0' # Sundays at 3 AM UTC
3945

4046
env:
4147
OPENSHIFT_NAMESPACE: e2e-tests
42-
PIPELINE_NAME: e2e-knowledge-tuning
4348

4449
jobs:
4550
# ==========================================================================
@@ -50,6 +55,7 @@ jobs:
5055
runs-on: ubuntu-latest
5156
outputs:
5257
resources_ready: ${{ steps.check.outputs.ready }}
58+
pipeline_name: ${{ steps.check.outputs.pipeline_name }}
5359

5460
steps:
5561
- name: Checkout code
@@ -77,14 +83,20 @@ jobs:
7783
run: |
7884
echo "📦 Applying Tekton resources..."
7985
80-
# Apply resources (PVCs, ServiceAccount)
81-
oc apply -f tests/e2e/knowledge_tuning/rhoai/tekton/resources.yaml
86+
MODE="${{ github.event.inputs.mode || 'single-pod' }}"
8287
83-
# Apply Task
84-
oc apply -f tests/e2e/knowledge_tuning/rhoai/tekton/task-notebook-runner.yaml
88+
# Apply common resources (PVCs, ServiceAccount)
89+
oc apply -f tests/e2e/knowledge_tuning/rhoai/tekton/resources.yaml
8590
86-
# Apply Pipeline
87-
oc apply -f tests/e2e/knowledge_tuning/rhoai/tekton/pipeline-e2e.yaml
91+
if [ "$MODE" == "single-pod" ]; then
92+
echo "📦 Applying single-pod Task and Pipeline..."
93+
oc apply -f tests/e2e/knowledge_tuning/rhoai/tekton/task-e2e-single-pod.yaml
94+
oc apply -f tests/e2e/knowledge_tuning/rhoai/tekton/pipeline-single-pod.yaml
95+
else
96+
echo "📦 Applying multi-pod Task and Pipeline..."
97+
oc apply -f tests/e2e/knowledge_tuning/rhoai/tekton/task-notebook-runner.yaml
98+
oc apply -f tests/e2e/knowledge_tuning/rhoai/tekton/pipeline-e2e.yaml
99+
fi
88100
89101
echo "✅ Tekton resources applied"
90102
@@ -93,17 +105,28 @@ jobs:
93105
run: |
94106
echo "🔍 Verifying Tekton resources..."
95107
108+
MODE="${{ github.event.inputs.mode || 'single-pod' }}"
109+
110+
if [ "$MODE" == "single-pod" ]; then
111+
PIPELINE_NAME="e2e-knowledge-tuning-single-pod"
112+
TASK_NAME="e2e-knowledge-tuning-single-pod"
113+
else
114+
PIPELINE_NAME="e2e-knowledge-tuning"
115+
TASK_NAME="notebook-runner"
116+
fi
117+
96118
# Check Pipeline exists
97-
oc get pipeline ${{ env.PIPELINE_NAME }} -n ${{ env.OPENSHIFT_NAMESPACE }}
119+
oc get pipeline $PIPELINE_NAME -n ${{ env.OPENSHIFT_NAMESPACE }}
98120
99121
# Check Task exists
100-
oc get task notebook-runner -n ${{ env.OPENSHIFT_NAMESPACE }}
122+
oc get task $TASK_NAME -n ${{ env.OPENSHIFT_NAMESPACE }}
101123
102124
# Check PVCs
103125
oc get pvc -n ${{ env.OPENSHIFT_NAMESPACE }}
104126
127+
echo "pipeline_name=$PIPELINE_NAME" >> $GITHUB_OUTPUT
105128
echo "ready=true" >> $GITHUB_OUTPUT
106-
echo "✅ All resources ready"
129+
echo "✅ All resources ready (mode: $MODE)"
107130
108131
# ==========================================================================
109132
# Job 2: Trigger Tekton Pipeline
@@ -174,10 +197,54 @@ jobs:
174197
id: trigger
175198
run: |
176199
PIPELINERUN_NAME="e2e-run-${{ github.run_id }}-${{ github.run_attempt }}"
200+
PIPELINE_NAME="${{ needs.setup-tekton.outputs.pipeline_name }}"
201+
MODE="${{ github.event.inputs.mode || 'single-pod' }}"
177202
178203
echo "🚀 Creating PipelineRun: $PIPELINERUN_NAME"
204+
echo " Pipeline: $PIPELINE_NAME"
205+
echo " Mode: $MODE"
179206
180-
cat <<EOF | oc apply -f -
207+
# Single-pod mode doesn't use workspaces (uses emptyDir internally)
208+
if [ "$MODE" == "single-pod" ]; then
209+
cat <<EOF | oc apply -f -
210+
apiVersion: tekton.dev/v1beta1
211+
kind: PipelineRun
212+
metadata:
213+
name: ${PIPELINERUN_NAME}
214+
namespace: ${{ env.OPENSHIFT_NAMESPACE }}
215+
labels:
216+
app: e2e-tests
217+
trigger: github-actions
218+
mode: single-pod
219+
github-run-id: "${{ github.run_id }}"
220+
spec:
221+
pipelineRef:
222+
name: ${PIPELINE_NAME}
223+
serviceAccountName: e2e-pipeline-sa
224+
params:
225+
- name: git-url
226+
value: "https://github.com/${{ github.repository }}.git"
227+
- name: git-revision
228+
value: "${{ steps.params.outputs.git_branch }}"
229+
- name: test-profile
230+
value: "${{ steps.params.outputs.profile }}"
231+
- name: student-model
232+
value: "${{ steps.params.outputs.model }}"
233+
- name: teacher-model
234+
value: "${{ steps.params.outputs.model }}"
235+
- name: skip-steps
236+
value: "${{ steps.params.outputs.skip_steps }}"
237+
podTemplate:
238+
tolerations:
239+
- key: "nvidia.com/gpu"
240+
operator: "Exists"
241+
effect: "NoSchedule"
242+
nodeSelector:
243+
nvidia.com/gpu.present: "true"
244+
EOF
245+
else
246+
# Multi-pod mode uses PVC workspaces
247+
cat <<EOF | oc apply -f -
181248
apiVersion: tekton.dev/v1beta1
182249
kind: PipelineRun
183250
metadata:
@@ -186,10 +253,11 @@ jobs:
186253
labels:
187254
app: e2e-tests
188255
trigger: github-actions
256+
mode: multi-pod
189257
github-run-id: "${{ github.run_id }}"
190258
spec:
191259
pipelineRef:
192-
name: ${{ env.PIPELINE_NAME }}
260+
name: ${PIPELINE_NAME}
193261
serviceAccountName: e2e-pipeline-sa
194262
params:
195263
- name: git-url
@@ -219,17 +287,20 @@ jobs:
219287
nodeSelector:
220288
nvidia.com/gpu.present: "true"
221289
EOF
290+
fi
222291
223292
echo "pipelinerun_name=$PIPELINERUN_NAME" >> $GITHUB_OUTPUT
224293
echo "✅ PipelineRun created"
225294
226295
- name: Generate summary
227296
run: |
297+
MODE="${{ github.event.inputs.mode || 'single-pod' }}"
228298
echo "## Tekton Pipeline Triggered" >> $GITHUB_STEP_SUMMARY
229299
echo "" >> $GITHUB_STEP_SUMMARY
230300
echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY
231301
echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY
232302
echo "| PipelineRun | \`${{ steps.trigger.outputs.pipelinerun_name }}\` |" >> $GITHUB_STEP_SUMMARY
303+
echo "| Mode | $MODE |" >> $GITHUB_STEP_SUMMARY
233304
echo "| Profile | ${{ steps.params.outputs.profile }} |" >> $GITHUB_STEP_SUMMARY
234305
echo "| Model | ${{ steps.params.outputs.model }} |" >> $GITHUB_STEP_SUMMARY
235306
echo "| Branch | ${{ steps.params.outputs.git_branch }} |" >> $GITHUB_STEP_SUMMARY

tests/e2e/knowledge_tuning/rhoai/tekton/README.md

Lines changed: 72 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,61 @@
11
# Tekton-based E2E Testing for Knowledge-Tuning
22

3-
This directory contains Tekton Pipeline resources for running E2E tests on RHOAI with step isolation and built-in retry capabilities.
3+
This directory contains Tekton Pipeline resources for running E2E tests on RHOAI with Tekton Dashboard visibility and built-in retry capabilities.
44

5-
## Architecture
5+
## Two Execution Modes
6+
7+
### Single-Pod Mode (Default, Recommended)
8+
9+
All 6 notebooks run in a **single Pod** with step-by-step visibility:
610

711
```text
812
┌─────────────────────────────────────────────────────────────────────────────┐
9-
Tekton Pipeline
13+
Single Pod (Tekton Task)
1014
├─────────────────────────────────────────────────────────────────────────────┤
15+
│ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ │
16+
│ │ Step 1 │─▶│ Step 2 │─▶│ Step 3 │─▶│ Step 4 │─▶│ Step 5 │─▶│ Step 6 │ │
17+
│ └────────┘ └────────┘ └────────┘ └────────┘ └────────┘ └────────┘ │
1118
│ │
12-
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
13-
│ │ Step 1 │ │ Step 2 │ │ Step 3 │ │ Step 4 │ │
14-
│ │ (Pod 1) │──▶│ (Pod 2) │──▶│ (Pod 3) │──▶│ (Pod 4) │──▶ │
15-
│ │ │ │ │ │ │ │ │ │
16-
│ │ Base Model │ │ Data │ │ Knowledge │ │ Knowledge │ │
17-
│ │ Evaluation │ │ Processing │ │ Generation │ │ Mixing │ │
18-
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
19-
│ │
20-
│ ┌─────────────┐ ┌─────────────┐ │
21-
│ │ Step 5 │ │ Step 6 │ Shared PVC for data/outputs │
22-
│ │ (Pod 5) │──▶│ (Pod 6) │ ━━━━━━━━━━━━━━━━━━━━━━━━━━━ │
23-
│ │ │ │ │ │
24-
│ │ Model │ │ Evaluation │ Each Pod has: │
25-
│ │ Training │ │ │ • GPU access │
26-
│ └─────────────┘ └─────────────┘ • Retry capability │
27-
│ • Isolated dependencies │
19+
│ ✅ Fast startup (single Pod) ✅ Tekton Dashboard visibility │
20+
│ ✅ Simple debugging (one log stream) ✅ Built-in retry at Task level │
21+
│ ✅ Shared GPU/filesystem ✅ No PVC required │
22+
└─────────────────────────────────────────────────────────────────────────────┘
23+
```
24+
25+
### Multi-Pod Mode (Optional)
26+
27+
Each notebook runs in a **separate Pod** for step isolation:
28+
29+
```text
30+
┌─────────────────────────────────────────────────────────────────────────────┐
31+
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
32+
│ │ Pod 1 │──▶│ Pod 2 │──▶│ Pod 3 │──▶│ Pod 4 │──▶│ Pod 5 │──▶ 6 │
33+
│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │
34+
│ ↓ ↓ ↓ ↓ ↓ │
35+
│ ═══════════════ Shared PVC for data/outputs ═══════════════════ │
2836
└─────────────────────────────────────────────────────────────────────────────┘
2937
```
3038

31-
## Benefits over K8s Job Approach
39+
## Mode Comparison
3240

33-
| Feature | K8s Job | Tekton Pipeline |
34-
|---------|---------|-----------------|
35-
| Step isolation | ❌ Single container | ✅ Separate Pods |
36-
| Retry per step | ❌ Manual | ✅ Built-in |
37-
| Resource cleanup | ❌ Manual |Automatic |
38-
| Progress visibility | ❌ Logs only |Tekton Dashboard |
39-
| Parallel steps |No |Supported |
40-
| Artifact passing |Manual |Workspaces |
41+
| Feature | Single-Pod | Multi-Pod |
42+
|---------|------------|-----------|
43+
| Startup time | ✅ Fast | ❌ Slower (Pod per step) |
44+
| Debugging | ✅ Simple | ❌ Multiple logs |
45+
| Step retry | ✅ Task level |Per-step |
46+
| Dashboard visibility | ✅ Yes |Yes |
47+
| Step isolation |Shared |Isolated |
48+
| PVC required |No |Yes |
4149

4250
## Files
4351

4452
| File | Description |
4553
|------|-------------|
4654
| `resources.yaml` | Namespace, PVCs, ServiceAccount |
47-
| `task-notebook-runner.yaml` | Reusable Task for running notebooks |
48-
| `pipeline-e2e.yaml` | Pipeline that chains all 6 steps |
55+
| `task-e2e-single-pod.yaml` | Single-pod Task (all steps in one Pod) |
56+
| `pipeline-single-pod.yaml` | Pipeline wrapper for single-pod mode |
57+
| `task-notebook-runner.yaml` | Multi-pod Task (one notebook per call) |
58+
| `pipeline-e2e.yaml` | Multi-pod Pipeline (6 TaskRuns) |
4959

5060
## Prerequisites
5161

@@ -60,19 +70,37 @@ This directory contains Tekton Pipeline resources for running E2E tests on RHOAI
6070
### Option 1: Via GitHub Actions
6171

6272
```bash
63-
# Trigger via GitHub CLI
73+
# Single-pod mode (default, recommended)
6474
gh workflow run e2e-tekton.yml \
6575
-f profile=minimal \
66-
-f git_branch=main
76+
-f mode=single-pod
77+
78+
# Multi-pod mode (step isolation)
79+
gh workflow run e2e-tekton.yml \
80+
-f profile=minimal \
81+
-f mode=multi-pod
82+
```
83+
84+
### Option 2: Using trigger script
85+
86+
```bash
87+
# Single-pod mode (default)
88+
./trigger-pipeline.sh --profile minimal
89+
90+
# Multi-pod mode
91+
./trigger-pipeline.sh --profile minimal --mode multi-pod
92+
93+
# With options
94+
./trigger-pipeline.sh --profile standard --branch feature-branch --skip "1,6"
6795
```
6896

69-
### Option 2: Manual Setup on RHOAI
97+
### Option 3: Manual Setup on RHOAI
7098

7199
```bash
72-
# 1. Apply resources
100+
# 1. Apply resources (single-pod mode)
73101
oc apply -f resources.yaml
74-
oc apply -f task-notebook-runner.yaml
75-
oc apply -f pipeline-e2e.yaml
102+
oc apply -f task-e2e-single-pod.yaml
103+
oc apply -f pipeline-single-pod.yaml
76104

77105
# 2. Start a PipelineRun
78106
cat <<EOF | oc apply -f -
@@ -83,18 +111,18 @@ metadata:
83111
namespace: e2e-tests
84112
spec:
85113
pipelineRef:
86-
name: e2e-knowledge-tuning
114+
name: e2e-knowledge-tuning-single-pod
87115
serviceAccountName: e2e-pipeline-sa
88116
params:
89117
- name: test-profile
90118
value: "minimal"
91-
workspaces:
92-
- name: shared-data
93-
persistentVolumeClaim:
94-
claimName: e2e-shared-data
95-
- name: output-notebooks
96-
persistentVolumeClaim:
97-
claimName: e2e-output-notebooks
119+
podTemplate:
120+
tolerations:
121+
- key: "nvidia.com/gpu"
122+
operator: "Exists"
123+
effect: "NoSchedule"
124+
nodeSelector:
125+
nvidia.com/gpu.present: "true"
98126
EOF
99127

100128
# 3. Watch progress

0 commit comments

Comments
 (0)