Skip to content

Commit ac1e9dc

Browse files
committed
Merge remote-tracking branch 'kubeflow/main' into merge-kubeflow-to-odh
2 parents bcad816 + 79ed60a commit ac1e9dc

4 files changed

Lines changed: 130 additions & 125 deletions

File tree

.github/scripts/ci_checks/ci_checks.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,22 @@ def get_check_runs(self, repo: str, head_sha: str) -> dict:
4242
return json.loads(result.stdout)
4343

4444
def get_own_check_run_id(self, repo: str, head_sha: str, check_name: str) -> int:
45-
"""Return the ID of the check run matching *check_name*."""
45+
"""Return the ID of the check run matching *check_name*.
46+
47+
Prefers an in-progress instance over completed ones to avoid
48+
misidentifying a stale run when multiple runs share the same name.
49+
Falls back to the first completed run if none is in-progress.
50+
"""
4651
data = self.get_check_runs(repo, head_sha)
52+
fallback: int | None = None
4753
for cr in data.get("check_runs", []):
4854
if cr["name"] == check_name:
49-
return cr["id"]
55+
if cr["status"] == "in_progress":
56+
return cr["id"]
57+
if fallback is None:
58+
fallback = cr["id"]
59+
if fallback is not None:
60+
return fallback
5061
raise ChecksError(f"Check run '{check_name}' not found")
5162

5263

.github/scripts/ci_checks/tests/test_ci_checks.py

Lines changed: 84 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,17 @@ def get_check_runs(self, repo: str, head_sha: str) -> dict:
7272
return response
7373

7474
def get_own_check_run_id(self, repo: str, head_sha: str, check_name: str) -> int:
75-
"""Return a fixed check run ID by scanning check_runs_responses."""
75+
"""Return a fixed check run ID, preferring the in-progress instance."""
7676
if self._check_runs_responses:
77+
fallback: int | None = None
7778
for cr in self._check_runs_responses[0].get("check_runs", []):
7879
if cr["name"] == check_name:
79-
return cr["id"]
80+
if cr["status"] == "in_progress":
81+
return cr["id"]
82+
if fallback is None:
83+
fallback = cr["id"]
84+
if fallback is not None:
85+
return fallback
8086
raise ChecksError(f"Check run '{check_name}' not found")
8187

8288

@@ -389,6 +395,69 @@ def test_empty_check_runs_retries_then_fails(self):
389395
wait_for_checks(gh, "owner/repo", "abc123", check_run_id=999, delay=0, retries=2, interval=5)
390396
assert gh.get_check_runs.call_count == 2
391397

398+
def test_stale_completed_run_with_same_name_excluded_by_ignore(self):
399+
"""A stale completed check_ci_status run is excluded via ignore_checks."""
400+
gh = MagicMock(spec=GhClient)
401+
gh.get_check_runs.return_value = json.loads(
402+
_api_response(
403+
_make_check_run(800, "check_ci_status", "completed", "failure"),
404+
_make_check_run(999, "check_ci_status", "in_progress"),
405+
_make_check_run(100, "lint", "completed", "success"),
406+
)
407+
)
408+
wait_for_checks(
409+
gh,
410+
"owner/repo",
411+
"abc123",
412+
check_run_id=999,
413+
delay=0,
414+
retries=1,
415+
interval=0,
416+
ignore_checks=frozenset({"check_ci_status"}),
417+
)
418+
419+
def test_stale_failed_run_causes_false_failure_without_ignore(self):
420+
"""Without ignore_checks, a stale failed run with the same name causes failure."""
421+
gh = MagicMock(spec=GhClient)
422+
gh.get_check_runs.return_value = json.loads(
423+
_api_response(
424+
_make_check_run(800, "check_ci_status", "completed", "failure"),
425+
_make_check_run(999, "check_ci_status", "in_progress"),
426+
_make_check_run(100, "lint", "completed", "success"),
427+
)
428+
)
429+
with pytest.raises(ChecksError, match="check_ci_status"):
430+
wait_for_checks(
431+
gh,
432+
"owner/repo",
433+
"abc123",
434+
check_run_id=999,
435+
delay=0,
436+
retries=1,
437+
interval=0,
438+
)
439+
440+
def test_concurrent_in_progress_run_excluded_by_ignore(self):
441+
"""Two concurrent in-progress runs with same name don't deadlock when using ignore_checks."""
442+
gh = MagicMock(spec=GhClient)
443+
gh.get_check_runs.return_value = json.loads(
444+
_api_response(
445+
_make_check_run(998, "check_ci_status", "in_progress"),
446+
_make_check_run(999, "check_ci_status", "in_progress"),
447+
_make_check_run(100, "lint", "completed", "success"),
448+
)
449+
)
450+
wait_for_checks(
451+
gh,
452+
"owner/repo",
453+
"abc123",
454+
check_run_id=999,
455+
delay=0,
456+
retries=1,
457+
interval=0,
458+
ignore_checks=frozenset({"check_ci_status"}),
459+
)
460+
392461
def test_many_check_runs_all_evaluated(self):
393462
"""All check runs are evaluated, not just the first page worth."""
394463
gh = MagicMock(spec=GhClient)
@@ -446,14 +515,25 @@ def test_get_own_check_run_id_finds_matching_check(self, mock_run):
446515
assert client.get_own_check_run_id("owner/repo", "abc123", "check_ci_status") == 200
447516

448517
@patch("ci_checks.ci_checks.subprocess.run")
449-
def test_get_own_check_run_id_returns_first_match(self, mock_run):
450-
"""get_own_check_run_id returns the first matching ID when duplicates exist."""
518+
def test_get_own_check_run_id_prefers_in_progress(self, mock_run):
519+
"""get_own_check_run_id prefers the in-progress run over a completed one."""
451520
response = _api_response(
452521
_make_check_run(200, "check_ci_status", "completed", "success"),
453522
_make_check_run(300, "check_ci_status", "in_progress"),
454523
)
455524
mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=response)
456525
client = GhClient()
526+
assert client.get_own_check_run_id("owner/repo", "abc123", "check_ci_status") == 300
527+
528+
@patch("ci_checks.ci_checks.subprocess.run")
529+
def test_get_own_check_run_id_falls_back_to_completed(self, mock_run):
530+
"""get_own_check_run_id falls back to completed run when none is in-progress."""
531+
response = _api_response(
532+
_make_check_run(200, "check_ci_status", "completed", "success"),
533+
_make_check_run(300, "check_ci_status", "completed", "success"),
534+
)
535+
mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=response)
536+
client = GhClient()
457537
assert client.get_own_check_run_id("owner/repo", "abc123", "check_ci_status") == 200
458538

459539
@patch("ci_checks.ci_checks.subprocess.run")

.github/workflows/ci-checks.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,13 @@ on:
66
pull_request_target:
77
types: [opened, synchronize, reopened, labeled]
88

9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.event.number }}-${{ github.event.label.name || 'ci' }}
11+
cancel-in-progress: true
12+
913
jobs:
1014
check_ci_status:
15+
if: github.event.action != 'labeled' || github.event.label.name == 'ok-to-test'
1116
runs-on: ubuntu-24.04
1217
timeout-minutes: 10
1318
permissions:
@@ -39,7 +44,7 @@ jobs:
3944
--author-login "$AUTHOR_LOGIN" \
4045
--head-sha "$HEAD_SHA" \
4146
--check-name "check_ci_status" \
42-
--ignore-checks "Agent" \
47+
--ignore-checks "Agent,check_ci_status" \
4348
--delay 5 \
4449
--retries 10 \
4550
--polling-interval 5 \

AGENTS.md

Lines changed: 27 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@ See also:
99

1010
## Sources of truth (keep this doc aligned)
1111

12-
If this guide conflicts with repository enforcement or process docs, treat these as sources of truth:
13-
14-
This guide is expected to stay current; when repository enforcement, CI, or contribution process changes (or when a
15-
difference is noted), update `AGENTS.md` alongside the change.
12+
If this guide conflicts with repository enforcement or process docs, treat these as sources of truth.
13+
When repository enforcement, CI, or contribution process changes, update `AGENTS.md` alongside the change.
1614

1715
- [`CONTRIBUTING.md`](docs/CONTRIBUTING.md) (required files, workflow, required metadata fields)
1816
- [`GOVERNANCE.md`](docs/GOVERNANCE.md) (roles, ownership, lifecycle)
@@ -47,30 +45,6 @@ Agents typically interact with this repository in three modes. Use the mode to d
4745

4846
Goal: add or update an asset under `components/` or `pipelines/` that is reusable and passes repo validations.
4947

50-
### Before you generate code
51-
52-
#### Reuse-first: search for existing components/pipelines
53-
54-
Before adding anything new:
55-
56-
- Search under `components/<category>/` and `pipelines/<category>/` for similar functionality.
57-
- Prefer extending or composing existing assets instead of duplicating.
58-
59-
Good places to look:
60-
61-
- `components/` and `pipelines/` category directories for similar patterns and reusable building blocks (example:
62-
`components/data_processing/yoda_data_processor`)
63-
- `scripts/generate_skeleton/` (canonical templates)
64-
- `scripts/generate_readme/` (README generation expectations)
65-
66-
#### Establish the target location and naming
67-
68-
- Components live under `components/<category>/<component_name>/`.
69-
- Components can optionally use subcategories: `components/<category>/<subcategory>/<component_name>/`.
70-
- Pipelines live under `pipelines/<category>/<pipeline_name>/`.
71-
- Pipelines can optionally use subcategories: `pipelines/<category>/<subcategory>/<pipeline_name>/`.
72-
- Use `snake_case` directory names (per `CONTRIBUTING.md`).
73-
7448
### Required files
7549

7650
When the agent changes or adds a component/pipeline directory, follow
@@ -88,57 +62,18 @@ Process (expected for agents):
8862
- Open a submission issue using `.github/ISSUE_TEMPLATE/component_submission.md`.
8963
- Get Pipelines Working Group approval in that issue (link it from the PR).
9064
- Open a PR with the implementation.
91-
- Follow the repo’s OWNERS-based review flow described in `CONTRIBUTING.md` (`/lgtm` + `/approve`).
92-
93-
### Example prompts (Mode 1)
94-
95-
#### Add a new component (reuse-first, compliant)
96-
97-
Use this prompt pattern:
98-
99-
"Search `components/` for similar functionality and reuse if possible. If a new component is needed, create it under
100-
`components/<category>/<name>/` using `make component CATEGORY=<cat> NAME=<name> [NO_TESTS=true]`, then implement
101-
`component.py` following repository lint rules (including import guard). Create `metadata.yaml` that conforms to
102-
the metadata schema defined in [`CONTRIBUTING.md`](docs/CONTRIBUTING.md#metadatayaml-schema) (required field order, fresh `lastVerified`). Generate/validate
103-
`README.md` using `make readme TYPE=component CATEGORY=<cat> NAME=<name>`. Add unit tests using `.python_func()` and a
104-
LocalRunner test using `setup_and_teardown_subprocess_runner` (you can generate tests via
105-
`make tests TYPE=component CATEGORY=<cat> NAME=<name>`). Reference an existing component like
106-
`components/data_processing/yoda_data_processor/` for patterns."
107-
108-
#### Add a component in a subcategory
109-
110-
Use this prompt pattern when creating related components that should share ownership or utilities:
111-
112-
"Create a component in a subcategory using `make component CATEGORY=<cat> SUBCATEGORY=<sub> NAME=<name>`. This
113-
automatically creates the subcategory structure with OWNERS and README.md if it doesn't exist. For shared utilities,
114-
add `CREATE_SHARED=true` to create a `shared/` package. Update the subcategory OWNERS and README.md with appropriate
115-
maintainers and documentation. Follow the same component implementation patterns as above."
116-
117-
#### Add a new pipeline (reuse-first, compliant)
118-
119-
Use this prompt pattern:
65+
- Follow the repo's OWNERS-based review flow described in `CONTRIBUTING.md` (`/lgtm` + `/approve`).
12066

121-
"Search `pipelines/` for similar functionality and reuse if possible. If a new pipeline is needed, create it under
122-
`pipelines/<category>/<name>/` using `make pipeline CATEGORY=<cat> NAME=<name> [NO_TESTS=true]`, then implement
123-
`pipeline.py` following repository lint rules (including import guard). Create `metadata.yaml` that conforms to the
124-
metadata schema defined in [`CONTRIBUTING.md`](docs/CONTRIBUTING.md#metadatayaml-schema) (required field order, fresh
125-
`lastVerified`). Generate/validate `README.md` using `make readme TYPE=pipeline CATEGORY=<cat> NAME=<name>`. Add tests
126-
(you can generate tests via `make tests TYPE=pipeline CATEGORY=<cat> NAME=<name>`)."
67+
### Common tasks
12768

128-
#### Add a pipeline in a subcategory
129-
130-
Use this prompt pattern when creating related pipelines that should share ownership or utilities:
131-
132-
"Create a pipeline in a subcategory using `make pipeline CATEGORY=<cat> SUBCATEGORY=<sub> NAME=<name>`. This
133-
automatically creates the subcategory structure with OWNERS and README.md if it doesn't exist. For shared utilities,
134-
add `CREATE_SHARED=true` to create a `shared/` package. Update the subcategory OWNERS and README.md with appropriate
135-
maintainers and documentation. Follow the same pipeline implementation patterns as above."
136-
137-
#### Update an existing component safely
138-
139-
"Find the existing component directory. Make the minimal change needed. Update docstrings and regenerate the README
140-
if the interface changed (`make readme TYPE=component CATEGORY=<cat> NAME=<name>`). Update `metadata.yaml` only if
141-
needed and keep `lastVerified` fresh. Add/adjust unit tests and LocalRunner tests. Ensure import guard compliance."
69+
| Task | Command | Reference pattern |
70+
|---|---|---|
71+
| New component | `make component CATEGORY=<cat> NAME=<name>` | `components/data_processing/yoda_data_processor/` |
72+
| New pipeline | `make pipeline CATEGORY=<cat> NAME=<name>` | `pipelines/training/sft/` |
73+
| Subcategory asset | Add `SUBCATEGORY=<sub>`; add `CREATE_SHARED=true` for shared utils | Subcategory OWNERS/README auto-created |
74+
| Generate tests | `make tests TYPE=component\|pipeline CATEGORY=<cat> NAME=<name>` | Unit tests + LocalRunner tests |
75+
| Generate README | `make readme TYPE=component\|pipeline CATEGORY=<cat> NAME=<name>` | Keeps README in sync |
76+
| Update existing | Minimal change, regenerate README if interface changed, keep `lastVerified` fresh | Same component dir |
14277

14378
## Mode 2: End user building pipelines from these components
14479

@@ -171,52 +106,26 @@ Follow [`CONTRIBUTING.md`](docs/CONTRIBUTING.md#dependency-management-uvlock) fo
171106

172107
### Python lint and formatting
173108

174-
Python lint/format is enforced by CI on pull requests and runs against **changed files**:
175-
176-
- Workflow: [`.github/workflows/python-lint.yml`](.github/workflows/python-lint.yml)
177-
178-
This uses Ruff formatting and linting (see `pyproject.toml` for configuration).
179-
180-
### Markdown lint
181-
182-
Markdown is linted in CI on pull requests and runs against **changed files**:
183-
184-
- Workflow: [`.github/workflows/markdown-lint.yml`](.github/workflows/markdown-lint.yml)
185-
- Config: [`.markdownlint.json`](.markdownlint.json)
186-
187-
### YAML lint
188-
189-
YAML is linted in CI on pull requests and runs against **changed files**:
190-
191-
- Workflow: [`.github/workflows/yaml-lint.yml`](.github/workflows/yaml-lint.yml)
192-
- Config: [`.yamllint.yml`](.yamllint.yml)
193-
194-
### Import guard (components/pipelines)
195-
196-
Follow [`CONTRIBUTING.md` (Testing and Quality)](docs/CONTRIBUTING.md#testing-and-quality).
197-
Allowlisted exceptions are defined in
198-
[`.github/scripts/check_imports/import_exceptions.yaml`](.github/scripts/check_imports/import_exceptions.yaml).
199-
200-
### Metadata schema validation
201-
202-
Follow the canonical schema requirements in
203-
[`CONTRIBUTING.md` (metadata.yaml schema)](docs/CONTRIBUTING.md#metadatayaml-schema).
204-
205-
CI workflow (reference): [`.github/workflows/validate-metadata-schema.yml`](.github/workflows/validate-metadata-schema.yml).
206-
207-
### Base image validation
109+
Enforced by CI ([`.github/workflows/python-lint.yml`](.github/workflows/python-lint.yml)) using Ruff (see `pyproject.toml`).
208110

209-
Follow the canonical policy in
210-
[`scripts/validate_base_images/README.md`](scripts/validate_base_images/README.md).
111+
Single-file commands:
211112

212-
CI workflow (reference): [`.github/workflows/base-image-check.yml`](.github/workflows/base-image-check.yml).
113+
- `uv run ruff check <file>` — lint one file
114+
- `uv run ruff check --fix <file>` — lint + auto-fix one file
115+
- `uv run ruff format <file>` — format one file
213116

214-
### README generation and sync
117+
Whole-repo: `make lint` (check) or `make format` (fix).
215118

216-
Follow the canonical generator behavior in
217-
[`scripts/generate_readme/README.md`](scripts/generate_readme/README.md) and keep READMEs in sync.
119+
### Other validations
218120

219-
CI workflow (reference): [`.github/workflows/readme-check.yml`](.github/workflows/readme-check.yml).
121+
| Validation | Config | CI workflow |
122+
|---|---|---|
123+
| Markdown lint | [`.markdownlint.json`](.markdownlint.json) | [`markdown-lint.yml`](.github/workflows/markdown-lint.yml) |
124+
| YAML lint | [`.yamllint.yml`](.yamllint.yml) | [`yaml-lint.yml`](.github/workflows/yaml-lint.yml) |
125+
| Import guard | [`import_exceptions.yaml`](.github/scripts/check_imports/import_exceptions.yaml); see [`CONTRIBUTING.md`](docs/CONTRIBUTING.md#testing-and-quality) | [`ci-checks.yml`](.github/workflows/ci-checks.yml) |
126+
| Metadata schema | [`CONTRIBUTING.md` (schema)](docs/CONTRIBUTING.md#metadatayaml-schema); keep required field order and fresh `lastVerified` | [`validate-metadata-schema.yml`](.github/workflows/validate-metadata-schema.yml) |
127+
| Base images | [`validate_base_images/README.md`](scripts/validate_base_images/README.md) | [`base-image-check.yml`](.github/workflows/base-image-check.yml) |
128+
| README sync | [`generate_readme/README.md`](scripts/generate_readme/README.md) | [`readme-check.yml`](.github/workflows/readme-check.yml) |
220129

221130
### Tests
222131

0 commit comments

Comments
 (0)