Skip to content

Commit 703ab46

Browse files
authored
fix: better checking of affected tasks for e2e (#785)
find-affected-tasks for e2e tests was just looking for strings in the task yamls. This causes issues - an example is that updating the tekton.py helper would trigger all tests because the string tekton is in the apiVersion string in the task yamls. This fixes that to only check certain parts of the yaml and have smarter checking when it does. Assisted-By: Cursor Signed-off-by: Johnny Bieren <jbieren@redhat.com>
1 parent a32bd18 commit 703ab46

4 files changed

Lines changed: 204 additions & 11 deletions

File tree

integration-tests/lib/find_catalog_suite_from_utils_diff.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
1. Build search tokens from changed paths using the utils ``Dockerfile``
77
(``COPY`` into ``/home``, ``PATH``) via :mod:`find_search_tokens_from_dockerfile`
88
(the process cwd must be the utils repo root so ``./Dockerfile`` exists).
9-
2. Search catalog ``tasks/**/*.yaml`` for those search tokens (skips
10-
``tasks/**/tests/`` fixture YAML).
9+
2. Search catalog Task step ``command`` / ``args`` / ``script`` fields for those
10+
tokens (skips ``tasks/**/tests/`` fixture YAML and Task metadata such as
11+
``apiVersion: tekton.dev``).
1112
3. Source catalog's ``find_release_pipelines_from_pr.sh`` and run
1213
``_catalog_stdin_task_paths_to_testcase_tokens`` (same mapping as catalog PR
1314
tooling).
@@ -42,6 +43,8 @@
4243
import sys
4344
from pathlib import Path
4445

46+
import yaml
47+
4548
import find_search_tokens_from_dockerfile as fts
4649
import helper_task_import_graph as htig
4750

@@ -153,6 +156,48 @@ def _changed_paths_trigger_global_catalog_run(changed: list[str]) -> bool:
153156
return False
154157

155158

159+
def _step_invocation_chunks(step: object) -> list[str]:
160+
"""Collect command/args/script strings from one Tekton Task step dict."""
161+
if not isinstance(step, dict):
162+
return []
163+
chunks: list[str] = []
164+
for key in ("command", "script"):
165+
val = step.get(key)
166+
if isinstance(val, str) and val:
167+
chunks.append(val)
168+
elif isinstance(val, list):
169+
chunks.extend(str(item) for item in val if item is not None)
170+
args = step.get("args")
171+
if isinstance(args, list):
172+
chunks.extend(str(item) for item in args if item is not None)
173+
elif isinstance(args, str) and args:
174+
chunks.append(args)
175+
return chunks
176+
177+
178+
def _extract_task_step_invocation_text(task_yaml_text: str) -> str:
179+
"""Return concatenated step command/args/script text from a Task YAML document.
180+
181+
Ignores ``apiVersion``, annotations, and other metadata so tokens like ``tekton``
182+
do not match every Task file.
183+
"""
184+
try:
185+
doc = yaml.safe_load(task_yaml_text)
186+
except yaml.YAMLError:
187+
return ""
188+
if not isinstance(doc, dict):
189+
return ""
190+
spec = doc.get("spec")
191+
if not isinstance(spec, dict):
192+
return ""
193+
chunks: list[str] = []
194+
steps = spec.get("steps")
195+
if isinstance(steps, list):
196+
for step in steps:
197+
chunks.extend(_step_invocation_chunks(step))
198+
return "\n".join(chunks)
199+
200+
156201
def _is_under_task_tests_dir(path: Path, tasks_root: Path) -> bool:
157202
"""Return whether ``path`` is under a ``tasks/.../tests/`` directory.
158203
@@ -167,11 +212,12 @@ def _is_under_task_tests_dir(path: Path, tasks_root: Path) -> bool:
167212

168213

169214
def _find_tasks_referencing_search_tokens(catalog: Path, search_tokens: set[str]) -> set[str]:
170-
"""Find Task YAML files whose content mentions any search token.
215+
"""Find Task YAML files whose step invocations mention any search token.
171216
172217
Search tokens are in-container paths (e.g. ``/home/pyxis/foo.py``) or command
173218
tokens (e.g. ``create_container_image``) from
174-
:mod:`find_search_tokens_from_dockerfile`. We walk ``catalog/tasks``, skip
219+
:mod:`find_search_tokens_from_dockerfile`. Only ``spec.steps`` ``command``,
220+
``args``, and ``script`` fields are searched. We walk ``catalog/tasks``, skip
175221
``tasks/**/tests/**``, require ``kind: Task``, and return paths relative to
176222
``catalog`` for any substring match.
177223
"""
@@ -189,8 +235,11 @@ def _find_tasks_referencing_search_tokens(catalog: Path, search_tokens: set[str]
189235
# Ignore non-Task files (e.g. Pipeline snippets) that might live under tasks/.
190236
if not re.search(r"kind:\s*Task\b", text):
191237
continue
238+
invocation_text = _extract_task_step_invocation_text(text)
239+
if not invocation_text:
240+
continue
192241
for token in search_tokens:
193-
if token in text:
242+
if token in invocation_text:
194243
found.add(task_yaml.relative_to(catalog).as_posix())
195244
break
196245
return found

integration-tests/lib/find_search_tokens_from_dockerfile.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
r"^COPY\s+(?!--from=)(\S+)\s+(/home/\S+)\s*(?:#.*)?$",
1919
re.IGNORECASE,
2020
)
21+
# ``ENV PYTHONPATH`` also contains the substring PATH; match only executable PATH.
22+
_ENV_PATH = re.compile(r"^ENV\s+PATH\s*=", re.IGNORECASE)
2123

2224

2325
@dataclass(frozen=True)
@@ -50,13 +52,11 @@ def parse_dockerfile_home_layout(dockerfile_text: str) -> UtilsImageHomeLayout:
5052
continue
5153
segment_to_home[src] = dest.rstrip("/")
5254

53-
# Second pass: PATH adds e.g. /home/pyxis for basename search tokens.
55+
# Second pass: ``ENV PATH=`` adds e.g. /home/pyxis for basename search tokens.
5456
path_dirs: set[str] = set()
5557
for line in dockerfile_text.splitlines():
5658
raw = line.split("#", 1)[0].strip()
57-
if not raw.upper().startswith("ENV "):
58-
continue
59-
if "PATH" not in raw:
59+
if not _ENV_PATH.match(raw):
6060
continue
6161
for hm in re.finditer(r"/home/[a-zA-Z0-9_.-]+", raw):
6262
path_dirs.add(hm.group(0))

integration-tests/lib/tests/test_find_catalog_suite_from_utils_diff.py

Lines changed: 127 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,13 +167,135 @@ def test_find_tasks_referencing_search_tokens_skips_tests_and_non_task(
167167
fixture.parent.mkdir(parents=True, exist_ok=True)
168168
not_task.parent.mkdir(parents=True, exist_ok=True)
169169
path = "/home/scripts/utils/foo.sh"
170-
good.write_text(f"kind: Task\nscript: {path}\n", encoding="utf-8")
170+
good.write_text(
171+
"kind: Task\nspec:\n steps:\n - name: run\n script: |\n "
172+
+ path
173+
+ "\n",
174+
encoding="utf-8",
175+
)
171176
fixture.write_text(f"kind: Task\n{path}\n", encoding="utf-8")
172177
not_task.write_text(f"kind: Pipeline\n{path}\n", encoding="utf-8")
173178
found = fc._find_tasks_referencing_search_tokens(tmp_path, {path})
174179
assert found == {"tasks/managed/t/task.yaml"}
175180

176181

182+
def test_find_tasks_referencing_search_tokens_ignores_tekton_in_metadata(
183+
tmp_path: Path,
184+
) -> None:
185+
"""Do not match ``tekton`` from ``apiVersion`` when step scripts do not use it."""
186+
tasks = tmp_path / "tasks"
187+
with_cmd = tasks / "internal" / "embargo" / "task.yaml"
188+
other = tasks / "managed" / "other" / "task.yaml"
189+
with_cmd.parent.mkdir(parents=True, exist_ok=True)
190+
other.parent.mkdir(parents=True, exist_ok=True)
191+
script_path = "/home/scripts/python/tasks/internal/check_embargoed_cves.py"
192+
with_cmd.write_text(
193+
f"""\
194+
apiVersion: tekton.dev/v1
195+
kind: Task
196+
metadata:
197+
name: check-embargoed-cves
198+
spec:
199+
steps:
200+
- name: run
201+
command: ["{script_path}"]
202+
""",
203+
encoding="utf-8",
204+
)
205+
other.write_text(
206+
"""\
207+
apiVersion: tekton.dev/v1
208+
kind: Task
209+
metadata:
210+
name: other
211+
spec:
212+
steps:
213+
- name: noop
214+
script: |
215+
echo hello
216+
""",
217+
encoding="utf-8",
218+
)
219+
found = fc._find_tasks_referencing_search_tokens(tmp_path, {"tekton"})
220+
assert found == set()
221+
found_path = fc._find_tasks_referencing_search_tokens(tmp_path, {script_path})
222+
assert found_path == {"tasks/internal/embargo/task.yaml"}
223+
224+
225+
def test_extract_task_step_invocation_text_collects_command_args_script() -> None:
226+
"""Gather only step invocation fields from parsed Task YAML."""
227+
text = """\
228+
kind: Task
229+
spec:
230+
steps:
231+
- name: a
232+
command: ["/home/pyxis/foo.py"]
233+
args: ["--verbose"]
234+
- name: b
235+
script: |
236+
/home/utils/bar.sh
237+
"""
238+
blob = fc._extract_task_step_invocation_text(text)
239+
assert "/home/pyxis/foo.py" in blob
240+
assert "--verbose" in blob
241+
assert "/home/utils/bar.sh" in blob
242+
243+
244+
def test_step_invocation_chunks_non_dict_step_returns_empty() -> None:
245+
"""Non-dict step entries (malformed YAML) yield no invocation chunks."""
246+
assert fc._step_invocation_chunks("not-a-step") == []
247+
assert fc._step_invocation_chunks(None) == []
248+
249+
250+
def test_step_invocation_chunks_string_args() -> None:
251+
"""``args`` may be a single string instead of a list."""
252+
chunks = fc._step_invocation_chunks(
253+
{"command": ["/home/pyxis/run.py"], "args": "--dry-run"}
254+
)
255+
assert chunks == ["/home/pyxis/run.py", "--dry-run"]
256+
257+
258+
def test_extract_task_step_invocation_text_invalid_yaml_returns_empty() -> None:
259+
"""Invalid YAML returns empty invocation text."""
260+
assert fc._extract_task_step_invocation_text("kind: Task\nsteps: [\n") == ""
261+
262+
263+
def test_extract_task_step_invocation_text_non_dict_document_returns_empty() -> None:
264+
"""YAML documents that are not mappings return empty invocation text."""
265+
assert fc._extract_task_step_invocation_text("- item\n") == ""
266+
267+
268+
def test_extract_task_step_invocation_text_non_dict_spec_returns_empty() -> None:
269+
"""A Task with a non-mapping ``spec`` returns empty invocation text."""
270+
text = "kind: Task\nspec: not-a-mapping\n"
271+
assert fc._extract_task_step_invocation_text(text) == ""
272+
273+
274+
def test_find_tasks_referencing_search_tokens_skips_empty_invocation_text(
275+
tmp_path: Path,
276+
) -> None:
277+
"""Tasks with no step command/args/script are skipped even if metadata matches."""
278+
tasks = tmp_path / "tasks"
279+
empty_steps = tasks / "managed" / "empty" / "task.yaml"
280+
empty_steps.parent.mkdir(parents=True, exist_ok=True)
281+
script = "/home/scripts/python/tasks/internal/check_embargoed_cves.py"
282+
empty_steps.write_text(
283+
f"""\
284+
apiVersion: tekton.dev/v1
285+
kind: Task
286+
metadata:
287+
name: empty
288+
annotations:
289+
description: {script}
290+
spec:
291+
steps: []
292+
""",
293+
encoding="utf-8",
294+
)
295+
found = fc._find_tasks_referencing_search_tokens(tmp_path, {script})
296+
assert found == set()
297+
298+
177299
def test_find_tasks_referencing_search_tokens_returns_empty_without_tasks_root(
178300
tmp_path: Path,
179301
) -> None:
@@ -186,7 +308,10 @@ def test_find_tasks_referencing_search_tokens_skips_unreadable_yaml(tmp_path: Pa
186308
"""Skip a task YAML file when read_text raises OSError."""
187309
task_yaml = tmp_path / "tasks" / "managed" / "t" / "task.yaml"
188310
task_yaml.parent.mkdir(parents=True, exist_ok=True)
189-
task_yaml.write_text("kind: Task\nscript: /home/scripts/x.sh\n", encoding="utf-8")
311+
task_yaml.write_text(
312+
"kind: Task\nspec:\n steps:\n - name: run\n script: /home/scripts/x.sh\n",
313+
encoding="utf-8",
314+
)
190315
with patch.object(Path, "read_text", side_effect=OSError("boom")):
191316
found = fc._find_tasks_referencing_search_tokens(tmp_path, {"/home/scripts/x.sh"})
192317
assert found == set()

integration-tests/lib/tests/test_find_search_tokens_from_dockerfile.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,25 @@ def test_parse_dockerfile_home_layout_maps_copy_and_path() -> None:
2323
assert "/home/pyxis" in layout.path_home_dirs
2424

2525

26+
def test_parse_dockerfile_ignores_pythonpath_env() -> None:
27+
"""``ENV PYTHONPATH`` must not be treated as executable ``PATH``."""
28+
text = """
29+
COPY scripts /home/scripts
30+
ENV PATH="$PATH:/home/pyxis"
31+
ENV PYTHONPATH="/home:/home/scripts/python/helpers:/home/scripts/python/tasks/internal"
32+
"""
33+
layout = fts.parse_dockerfile_home_layout(text)
34+
assert "/home/pyxis" in layout.path_home_dirs
35+
assert "/home/scripts" not in layout.path_home_dirs
36+
37+
38+
def test_search_tokens_scripts_py_no_stem_when_scripts_not_on_path() -> None:
39+
"""``scripts/**/*.py`` only get a full path token when ``/home/scripts`` is not on PATH."""
40+
layout = fts.parse_dockerfile_home_layout(MINIMAL_UTILS_DOCKERFILE)
41+
tokens = fts.search_tokens_for_repo_path("scripts/python/helpers/tekton.py", layout)
42+
assert tokens == frozenset({"/home/scripts/python/helpers/tekton.py"})
43+
44+
2645
def test_parse_skips_copy_from_stage() -> None:
2746
"""``COPY --from=`` lines do not define repo layout."""
2847
text = """

0 commit comments

Comments
 (0)