Skip to content

Commit 1915616

Browse files
committed
update piplock-renewal to skip pr it changes are only timestamps
1 parent 5145ce4 commit 1915616

2 files changed

Lines changed: 161 additions & 19 deletions

File tree

.github/workflows/piplock-renewal.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,56 @@ jobs:
9595
./uv run manifests/tools/generate_kustomization.py
9696
9797
- name: Refresh ImageStream notebook dependency annotations from pylocks
98+
env:
99+
# Used to resolve abbreviated SHAs → full OID before ``git fetch`` (GitHub API); avoids rate limits.
100+
GITHUB_TOKEN: ${{ github.token }}
98101
run: |
99102
./uv run python manifests/tools/update_imagestream_annotations_from_pylock.py --variant odh
100103
./uv run python manifests/tools/update_imagestream_annotations_from_pylock.py --variant rhoai
101104
102105
- name: Validate manifests (make test)
103106
run: make test
104107

108+
# ``uv pip compile`` rewrites the header ``--exclude-newer=…`` timestamp every run.
109+
# Per changed file, ignore hunks where all edits match ephemeral patterns (git 2.36+
110+
# ``diff -I``). Restore only those files so mixed renewals keep substantive updates.
111+
# When nothing substantive remains, skip opening a PR.
112+
- name: Discard ephemeral-only lockfile renewal diff
113+
shell: bash
114+
run: |
115+
set -euo pipefail
116+
if git diff --quiet HEAD && [ -z "$(git ls-files --others --exclude-standard)" ]; then
117+
echo "No changes vs HEAD."
118+
exit 0
119+
fi
120+
121+
while IFS= read -r f; do
122+
[ -z "$f" ] && continue
123+
if [ ! -e "$f" ]; then
124+
echo "Keeping (deleted vs HEAD): $f"
125+
continue
126+
fi
127+
if ! git cat-file -e "HEAD:$f" 2>/dev/null; then
128+
echo "Keeping (not in HEAD): $f"
129+
continue
130+
fi
131+
if git diff HEAD \
132+
-I '^#.*--exclude-newer' \
133+
-I '^created-at[[:space:]]*=' \
134+
--quiet -- "$f"; then
135+
git restore --source=HEAD --staged --worktree -- "$f"
136+
echo "Restored (metadata-only): $f"
137+
else
138+
echo "Keeping (substantive): $f"
139+
fi
140+
done < <(git diff --name-only HEAD)
141+
142+
if git diff --quiet HEAD && [ -z "$(git ls-files --others --exclude-standard)" ]; then
143+
echo "Only ephemeral lockfile metadata changed; working tree matches HEAD."
144+
exit 0
145+
fi
146+
echo "Substantive changes present; proceeding to PR creation."
147+
105148
- name: Create Pull Request
106149
env:
107150
GH_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}

manifests/tools/update_imagestream_annotations_from_pylock.py

Lines changed: 118 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,16 @@
77
``pylock.toml`` pins. If no file exists on disk, the SHA from ``commit-latest.env`` is used with
88
``git show`` (fetching from the canonical repo if needed): ``https://github.com/opendatahub-io/notebooks.git``
99
(``--variant odh``) or ``https://github.com/red-hat-data-services/notebooks.git`` (``--variant rhoai``).
10-
- Older tags (e.g. ``-2025-2``): SHA from ``commit.env`` (``<base>-commit-2025-2``).
10+
- Older tags (e.g. ``-2025-2``): SHA from ``commit.env``. Those commits may exist only on the canonical
11+
upstream (especially RHDS for ``--variant rhoai``); the script runs ``git fetch`` before ``git show``
12+
when the object is not already in the clone (fork CI often lacks RHDS-only SHAs until fetched).
1113
1214
Those SHAs match ``manifests/tools/generate_kustomization.py`` / ConfigMap keys.
1315
16+
Abbreviated SHAs (from ``commit*.env``) are expanded to full 40-character OIDs via the GitHub REST API
17+
when the canonical remote is ``github.com`` — ``git fetch … <short>`` treats short hex as a ref name and
18+
fails with ``couldn't find remote ref``. Set ``GITHUB_TOKEN`` / ``GH_TOKEN`` for API auth (rate limits).
19+
1420
Dependency *names* and ordering are taken from the existing manifest; versions are updated from
1521
the resolved lockfile at each ref (same translation rules as ``tests/test_main.py``). Older commits may only have ``requirements.txt`` or flavor files such as ``requirements.cpu.txt``
1622
instead of ``pylock.toml`` / ``uv.lock.d/pylock.*.toml``; those are parsed as pinned PEP 508 requirements.
@@ -31,10 +37,14 @@
3137
import dataclasses
3238
import json
3339
import logging
40+
import os
3441
import re
42+
import shlex
3543
import subprocess
3644
import sys
3745
import tomllib
46+
import urllib.error
47+
import urllib.request
3848
from pathlib import Path
3949
from typing import Any
4050

@@ -84,7 +94,7 @@
8494
}
8595
)
8696

87-
# Canonical Git URLs for ``git fetch`` when the ``-n`` tag commit is not already in the local object DB.
97+
# Canonical Git URLs for ``git fetch`` when a pinned commit is not already in the local object DB.
8898
_CANONICAL_REPO_URL: dict[str, str] = {
8999
"odh": "https://github.com/opendatahub-io/notebooks.git",
90100
"rhoai": "https://github.com/red-hat-data-services/notebooks.git",
@@ -94,6 +104,64 @@
94104
# Repo env files use short SHAs (7+ chars); full 40-char hashes are also accepted.
95105
_GIT_HEX_OBJECT_ID = re.compile(r"^[0-9a-f]{7,40}$")
96106

107+
_GITHUB_REPO_RE = re.compile(
108+
r"github\.com[:/](?P<owner>[^/]+)/(?P<repo>[^/?.#]+)(?:\.git)?(?:/|$)",
109+
re.IGNORECASE,
110+
)
111+
112+
113+
def _parse_github_owner_repo(git_url: str) -> tuple[str, str] | None:
114+
m = _GITHUB_REPO_RE.search(git_url)
115+
if not m:
116+
return None
117+
return m.group("owner"), m.group("repo")
118+
119+
120+
def _resolve_github_commit_full_sha(git_url: str, rev: str) -> str | None:
121+
"""Resolve abbreviated git SHA to full 40-char lowercase via GitHub REST API.
122+
123+
``git fetch https://…/repo.git <short>`` treats ``<short>`` as a *ref name*, which yields
124+
``couldn't find remote ref``. Passing the full OID fixes fetch against github.com.
125+
126+
Uses ``GITHUB_TOKEN`` / ``GH_TOKEN`` when set (CI rate limits and private forks).
127+
"""
128+
rev_clean = rev.strip().lower()
129+
if not _GIT_HEX_OBJECT_ID.fullmatch(rev_clean):
130+
return None
131+
if len(rev_clean) == 40:
132+
return rev_clean
133+
parsed = _parse_github_owner_repo(git_url)
134+
if parsed is None:
135+
return None
136+
owner, repo = parsed
137+
api = f"https://api.github.com/repos/{owner}/{repo}/commits/{rev_clean}"
138+
token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN")
139+
headers = {
140+
"Accept": "application/vnd.github+json",
141+
"User-Agent": "opendatahub-notebooks-manifest-tools",
142+
"X-GitHub-Api-Version": "2022-11-28",
143+
}
144+
if token:
145+
headers["Authorization"] = f"Bearer {token.strip()}"
146+
req = urllib.request.Request(api, headers=headers)
147+
try:
148+
with urllib.request.urlopen(req, timeout=120) as resp:
149+
body = json.loads(resp.read().decode())
150+
sha = body.get("sha")
151+
if isinstance(sha, str):
152+
sha = sha.lower()
153+
if len(sha) == 40 and sha.startswith(rev_clean):
154+
return sha
155+
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError, json.JSONDecodeError, KeyError):
156+
return None
157+
return None
158+
159+
160+
def _rev_for_git_fetch(git_url: str, rev: str) -> str:
161+
"""Return a revision string suitable for ``git fetch <url> <rev>`` (full OID when possible)."""
162+
expanded = _resolve_github_commit_full_sha(git_url, rev)
163+
return expanded if expanded is not None else rev.strip()
164+
97165

98166
def _is_under_jupyter_rocm_tree(directory: Path) -> bool:
99167
parts = directory.parts
@@ -345,22 +413,34 @@ def _git_commit_exists(rev: str) -> bool:
345413
return p.returncode == 0
346414

347415

348-
def _git_fetch_commit_from(url: str, rev: str) -> bool:
349-
p = subprocess.run(
350-
["git", "-C", str(ROOT), "fetch", "--quiet", "--no-tags", url, rev],
351-
capture_output=True,
352-
text=True,
353-
check=False,
354-
)
355-
return p.returncode == 0
356-
357-
358-
def _ensure_n_tag_commit_from_canonical_upstream(variant: str, rev: str) -> bool:
359-
"""Ensure ``rev`` is available for ``git show`` using the ODH or RHDS canonical ``.git`` URL for ``-n`` tags."""
416+
def _git_fetch_commit_from(url: str, rev: str) -> tuple[bool, str]:
417+
"""Fetch ``rev`` from ``url`` into the local object database. Returns (ok, detail on failure)."""
418+
fetch_rev = _rev_for_git_fetch(url, rev)
419+
cmd = ["git", "-C", str(ROOT), "fetch", "--quiet", "--no-tags", url, fetch_rev]
420+
quoted = " ".join(shlex.quote(c) for c in cmd)
421+
try:
422+
p = subprocess.run(cmd, capture_output=True, text=True, check=False, timeout=120)
423+
except subprocess.TimeoutExpired:
424+
return False, f"timed out after 120s\n (command: {quoted})"
425+
if p.returncode == 0:
426+
return True, ""
427+
err = (p.stderr or "").strip()
428+
if not err and (p.stdout or "").strip():
429+
err = (p.stdout or "").strip()
430+
if not err:
431+
err = f"exit code {p.returncode}"
432+
return False, f"{err}\n (command: {quoted})"
433+
434+
435+
def _ensure_commit_from_canonical_upstream(variant: str, rev: str) -> tuple[bool, str | None]:
436+
"""Ensure ``rev`` is available for ``git show`` via ``git fetch`` from the variant canonical repo."""
360437
url = _CANONICAL_REPO_URL[variant]
361438
if _git_commit_exists(rev):
362-
return True
363-
return _git_fetch_commit_from(url, rev)
439+
return True, None
440+
ok, detail = _git_fetch_commit_from(url, rev)
441+
if ok:
442+
return True, None
443+
return False, detail
364444

365445

366446
def _git_show_text(rev: str, rel_path: str) -> str | None:
@@ -622,6 +702,8 @@ def run_variant(variant: str, dry_run: bool) -> int:
622702
if idx >= len(tags):
623703
break
624704
sha = _sha_for_tag(base_key, suffix, latest, released)
705+
sha_desc = sha if sha else "<missing>"
706+
print(f"check {path.name} tag {idx}: {base_key}{suffix} (commit={sha_desc})", file=sys.stderr)
625707
nb_dir = resolve_notebook_directory(candidates, base_key)
626708
if nb_dir is None:
627709
want = notebook_dirname_from_base_key(base_key)
@@ -639,19 +721,36 @@ def run_variant(variant: str, dry_run: bool) -> int:
639721
shown = _worktree_read_first_existing(rel_paths)
640722
else:
641723
shown = None
724+
if shown is not None:
725+
rel_used, _text = shown
726+
print(
727+
f"ok {path.name} tag {idx}: using worktree lockfile {rel_used}",
728+
file=sys.stderr,
729+
)
642730

643731
if shown is None:
644732
if not sha:
645733
print(f"skip {path.name} tag {idx}: no SHA for {base_key}{suffix}", file=sys.stderr)
646734
continue
647-
if suffix == "-n" and not _ensure_n_tag_commit_from_canonical_upstream(variant, sha):
735+
# Released tags (e.g. ``-2025-2``) use ``commit.env`` SHAs that may live only on RHDS/ODH;
736+
# a fresh checkout (e.g. GitHub Actions) does not contain them until we fetch.
737+
ok_upstream, fetch_err = _ensure_commit_from_canonical_upstream(variant, sha)
738+
if not ok_upstream:
739+
url = _CANONICAL_REPO_URL[variant]
648740
print(
649-
f"skip {path.name} tag {idx}: could not resolve commit {sha} via "
650-
f"{_CANONICAL_REPO_URL[variant]}",
741+
f"skip {path.name} tag {idx}: git fetch failed for commit {sha} from {url}\n"
742+
f" {fetch_err}",
651743
file=sys.stderr,
652744
)
653745
continue
746+
print(f"ok {path.name} tag {idx}: commit {sha} is available locally", file=sys.stderr)
654747
shown = _git_show_first_existing(sha, rel_paths)
748+
if shown is not None:
749+
rel_used, _text = shown
750+
print(
751+
f"ok {path.name} tag {idx}: got lockfile {rel_used} from {sha}",
752+
file=sys.stderr,
753+
)
655754
if shown is None:
656755
print(
657756
f"skip {path.name} tag {idx}: no lockfile (tried worktree/git {'; '.join(rel_paths)})",

0 commit comments

Comments
 (0)