Skip to content

Commit d93077f

Browse files
committed
feat(RELEASE-2509): add python scripts for publish-to-nrrc task
add python scripts for publish-to-nrrc task Signed-off-by: Elena German <elgerman@redhat.com> Assisted-by: Claude
1 parent a0c68a7 commit d93077f

10 files changed

Lines changed: 1212 additions & 1 deletion

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ dependencies = [
2525
"diffused-lib==0.3.0",
2626
"confluent-kafka",
2727
"python-gitlab>=4.0",
28+
"python-dotenv>=1.0.0",
2829
]
2930

3031
[tool.black]

scripts/python/helpers/file.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
import gzip
88
import io
99
import os
10+
import re
11+
import subprocess
1012
import tempfile
13+
from collections.abc import Callable, Sequence
1114
from pathlib import Path
1215
from typing import Any
1316

@@ -23,6 +26,7 @@ def load_json_dict(path: Path) -> dict[str, Any]:
2326

2427

2528
_GZIP_READ_CHUNK_SIZE = 64 * 1024
29+
_ARCHIVE_TYPE = re.compile(r"(gzip compressed data|POSIX tar archive)")
2630

2731

2832
def sha256(path: Path) -> str:
@@ -57,6 +61,33 @@ def path_from_env_variable(
5761
return default if isinstance(default, Path) else Path(default)
5862

5963

64+
def resolve_path_under_base(base: Path, relative: str | Path) -> Path:
65+
"""Resolve *relative* under *base* and ensure the result stays inside *base*.
66+
67+
Rejects absolute paths and ``..`` traversal after resolution. Typical use:
68+
Tekton passes a path relative to a data directory (e.g. charon env/config files).
69+
"""
70+
text = str(relative).strip()
71+
if not text:
72+
raise ValueError(f"path must be relative to {base}: {relative!r}")
73+
rel = Path(text)
74+
if rel.is_absolute():
75+
raise ValueError(f"path must be relative to {base}: {relative!r}")
76+
root = base.resolve()
77+
candidate = (root / rel).resolve()
78+
if not candidate.is_relative_to(root):
79+
raise ValueError(f"path must stay under {base}: {relative!r}")
80+
return candidate
81+
82+
83+
NRRC_WORK_DIR_DEFAULT = Path("/var/workdir/nrrc")
84+
85+
86+
def nrrc_work_dir() -> Path:
87+
"""Return the NRRC staging directory from ``WORK_DIR`` or ``/var/workdir/nrrc``."""
88+
return path_from_env_variable("WORK_DIR", NRRC_WORK_DIR_DEFAULT)
89+
90+
6091
def make_tempfile_path(
6192
prefix: str,
6293
data: bytes | None = None,
@@ -97,3 +128,13 @@ def decompress_gzip_bounded(data: bytes, *, max_bytes: int) -> bytes:
97128
msg = f"decompressed data exceeds {max_bytes} bytes (possible gzip bomb)"
98129
raise ValueError(msg)
99130
return bytes(output)
131+
132+
133+
def is_gzip_or_tar_archive(
134+
path: Path,
135+
*,
136+
file_cmd: Callable[[Sequence[str | Path]], subprocess.CompletedProcess[str]],
137+
) -> bool:
138+
"""Return True when ``file -b`` reports gzip or tar content for *path*."""
139+
result = file_cmd(["file", "-b", str(path)])
140+
return _ARCHIVE_TYPE.search(result.stdout) is not None

scripts/python/helpers/test_file.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import json
66
import gzip
7+
import subprocess
8+
from collections.abc import Sequence
79
from pathlib import Path
810

911
import file
@@ -63,6 +65,43 @@ def test_load_json_dict_rejects_non_object(tmp_path: Path) -> None:
6365
file.load_json_dict(path)
6466

6567

68+
def test_resolve_path_under_base_relative_file(tmp_path: Path) -> None:
69+
"""A normal relative path resolves under *base*."""
70+
target = tmp_path / "uid" / "charon.env"
71+
target.parent.mkdir(parents=True)
72+
assert file.resolve_path_under_base(tmp_path, "uid/charon.env") == target.resolve()
73+
74+
75+
def test_resolve_path_under_base_rejects_absolute(tmp_path: Path) -> None:
76+
"""Absolute paths are rejected even if they exist."""
77+
with pytest.raises(ValueError, match="must be relative"):
78+
file.resolve_path_under_base(tmp_path, "/etc/passwd")
79+
80+
81+
def test_resolve_path_under_base_rejects_traversal(tmp_path: Path) -> None:
82+
"""``..`` segments that escape *base* are rejected."""
83+
with pytest.raises(ValueError, match="must stay under"):
84+
file.resolve_path_under_base(tmp_path, "../outside")
85+
86+
87+
def test_resolve_path_under_base_rejects_blank(tmp_path: Path) -> None:
88+
"""Blank relative paths are rejected."""
89+
with pytest.raises(ValueError, match="must be relative"):
90+
file.resolve_path_under_base(tmp_path, " ")
91+
92+
93+
def test_nrrc_work_dir_default() -> None:
94+
"""Default NRRC work directory uses the image writable root."""
95+
assert file.nrrc_work_dir() == Path("/var/workdir/nrrc")
96+
97+
98+
def test_nrrc_work_dir_from_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
99+
"""``WORK_DIR`` overrides the default NRRC staging directory."""
100+
custom = tmp_path / "staging"
101+
monkeypatch.setenv("WORK_DIR", str(custom))
102+
assert file.nrrc_work_dir() == custom
103+
104+
66105
def test_make_tempfile_path_empty_file() -> None:
67106
"""A `None` payload leaves the created file with zero length."""
68107
p = file.make_tempfile_path("t-", None)
@@ -94,3 +133,35 @@ def test_decompress_gzip_bounded_rejects_oversized_output() -> None:
94133
compressed = gzip.compress(raw)
95134
with pytest.raises(ValueError, match="gzip bomb"):
96135
file.decompress_gzip_bounded(compressed, max_bytes=1000)
136+
137+
138+
def test_is_gzip_or_tar_archive_posix_tar() -> None:
139+
"""POSIX tar archives are recognized."""
140+
141+
def fake_file_cmd(
142+
cmd: Sequence[str | Path],
143+
) -> subprocess.CompletedProcess[str]:
144+
return subprocess.CompletedProcess(
145+
[str(x) for x in cmd],
146+
0,
147+
stdout="POSIX tar archive\n",
148+
stderr="",
149+
)
150+
151+
assert file.is_gzip_or_tar_archive(Path("/tmp/archive.tar"), file_cmd=fake_file_cmd)
152+
153+
154+
def test_is_gzip_or_tar_archive_rejects_other_types() -> None:
155+
"""Non-archive ``file -b`` output returns False."""
156+
157+
def fake_file_cmd(
158+
cmd: Sequence[str | Path],
159+
) -> subprocess.CompletedProcess[str]:
160+
return subprocess.CompletedProcess(
161+
[str(x) for x in cmd],
162+
0,
163+
stdout="ASCII text\n",
164+
stderr="",
165+
)
166+
167+
assert not file.is_gzip_or_tar_archive(Path("/tmp/readme.txt"), file_cmd=fake_file_cmd)
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Parse charon parameter files used by NRRC/MRRC publish tasks."""
2+
3+
from __future__ import annotations
4+
5+
import re
6+
import shutil
7+
from pathlib import Path
8+
9+
from dotenv import dotenv_values
10+
11+
_REGISTRY_SPLIT = re.compile(r"%")
12+
13+
14+
def load_charon_env(path: Path) -> dict[str, str]:
15+
"""Load charon parameters from a dotenv file (``KEY=value`` lines)."""
16+
if not path.is_file():
17+
raise FileNotFoundError(f"charon env file not found: {path}")
18+
values = dotenv_values(path, encoding="utf-8")
19+
return {key: value for key, value in values.items() if value is not None}
20+
21+
22+
def split_oci_registries(value: str) -> list[str]:
23+
"""Split ``CHARON_OCI_REGISTRY`` on ``%`` into non-empty registry references."""
24+
return [part.strip() for part in _REGISTRY_SPLIT.split(value) if part.strip()]
25+
26+
27+
def short_sha256_prefix(registry: str) -> str:
28+
"""Return the first six characters of the digest in *registry*."""
29+
marker = "@sha256:"
30+
if marker not in registry:
31+
raise ValueError(f"registry reference missing @sha256: digest: {registry!r}")
32+
return registry.split(marker, 1)[1][:6]
33+
34+
35+
def source_repo(registry: str) -> str:
36+
"""Return the repository part of an OCI reference (before ``@sha256:``)."""
37+
return registry.split("@sha256:", 1)[0]
38+
39+
40+
def require_env_keys(env: dict[str, str], *keys: str) -> None:
41+
"""Raise ValueError when any *keys* are missing from *env*."""
42+
for key in keys:
43+
if key not in env:
44+
raise ValueError(f"missing required charon env variable: {key}")
45+
46+
47+
def require_oci_registries(env: dict[str, str]) -> list[str]:
48+
"""Return non-empty ``CHARON_OCI_REGISTRY`` entries from *env*."""
49+
try:
50+
value = env["CHARON_OCI_REGISTRY"]
51+
except KeyError as e:
52+
raise ValueError("CHARON_OCI_REGISTRY is required in charon env file") from e
53+
registries = split_oci_registries(value)
54+
if not registries:
55+
raise ValueError("CHARON_OCI_REGISTRY must list at least one registry reference")
56+
return registries
57+
58+
59+
def charon_config_path(*, home: Path | None = None) -> Path:
60+
"""Return the default charon configuration file path under *home* or ``Path.home()``."""
61+
root = home if home is not None else Path.home()
62+
return root / ".charon" / "charon.yaml"
63+
64+
65+
def install_charon_config(config_source: Path, *, home: Path | None = None) -> Path:
66+
"""Copy the charon config into ``$HOME/.charon/charon.yaml``."""
67+
if not config_source.is_file():
68+
raise FileNotFoundError(f"charon config file not found: {config_source}")
69+
dest = charon_config_path(home=home)
70+
dest.parent.mkdir(parents=True, exist_ok=True)
71+
shutil.copy2(config_source, dest)
72+
return dest
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#!/usr/bin/env python3
2+
"""Download npm archives from OCI registries for publish-to-nrrc.
3+
4+
Tekton injects ``DATA_DIR``, ``CHARON_PARAM_FILE_PATH``, and optionally
5+
``WORK_DIR`` (default ``/var/workdir/nrrc``; catalog sets ``/workdir/nrrc``) via env.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import subprocess
11+
from collections.abc import Callable
12+
from pathlib import Path
13+
14+
import charon_env
15+
import file
16+
import subprocess_cmd
17+
import tekton
18+
from logger import logger
19+
20+
21+
def prepare_repo(
22+
*,
23+
charon_param_file: Path,
24+
work_dir: Path,
25+
run_cmd: Callable[..., subprocess.CompletedProcess[str]] = subprocess_cmd.run_cmd,
26+
) -> None:
27+
"""Download OCI archives and collect gzip/tar files under ``shared/``."""
28+
env = charon_env.load_charon_env(charon_param_file)
29+
registries = charon_env.require_oci_registries(env)
30+
31+
repo_dir = work_dir
32+
shared_repo = repo_dir / "shared"
33+
shared_repo.mkdir(parents=True, exist_ok=True)
34+
35+
for registry in registries:
36+
logger.info("Downloading the npm archive from %s", registry)
37+
short_hash = charon_env.short_sha256_prefix(registry)
38+
subdir = repo_dir / short_hash
39+
subdir.mkdir(parents=True, exist_ok=True)
40+
41+
source_repo = charon_env.source_repo(registry)
42+
auth_file = file.make_tempfile_path("oci-auth-", None)
43+
try:
44+
auth = run_cmd(
45+
["select-oci-auth", source_repo],
46+
check=True,
47+
).stdout
48+
auth_file.write_text(auth, encoding="utf-8")
49+
run_cmd(
50+
[
51+
"oras",
52+
"pull",
53+
"--registry-config",
54+
str(auth_file),
55+
registry,
56+
"-o",
57+
str(subdir),
58+
],
59+
check=True,
60+
)
61+
finally:
62+
auth_file.unlink(missing_ok=True)
63+
64+
for found in subdir.rglob("*"):
65+
if not found.is_file():
66+
continue
67+
if not file.is_gzip_or_tar_archive(found, file_cmd=run_cmd):
68+
continue
69+
move_to = shared_repo / f"{short_hash}_{found.name}"
70+
if move_to.exists():
71+
logger.warning("%s already exists, skipped", move_to)
72+
continue
73+
found.rename(move_to)
74+
75+
76+
def main() -> int:
77+
"""Read Tekton env vars and prepare npm archives for upload."""
78+
data_dir = Path(tekton.require_env("DATA_DIR"))
79+
work_dir = file.nrrc_work_dir()
80+
charon_param_file = file.resolve_path_under_base(
81+
data_dir,
82+
tekton.require_env("CHARON_PARAM_FILE_PATH"),
83+
)
84+
prepare_repo(
85+
charon_param_file=charon_param_file,
86+
work_dir=work_dir,
87+
)
88+
return 0
89+
90+
91+
if __name__ == "__main__":
92+
raise SystemExit(main())

0 commit comments

Comments
 (0)