Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ Release metadata is stored in versioned ndjson files:
- `v1/` - The version of the schema
- `<project>.ndjson` - The release metadata for a given project

Each line in the NDJSON files represents one release, e.g.:
Each line in the NDJSON files represents one release. `date` should be the
GitHub release publish time in canonical UTC RFC3339 form, e.g.:

```json
{
"version": "0.8.3",
"date": "2025-07-29T16:45:46.646976+00:00",
"date": "2025-07-29T16:45:46Z",
"artifacts": [
{
"platform": "aarch64-apple-darwin",
Expand All @@ -29,8 +30,9 @@ Each line in the NDJSON files represents one release, e.g.:

## Adding versions

Use `insert-versions.py` to add versions. It reads NDJSON in the above format from stdin and inserts
them into the target file, deduplicating by version string and ensuring the proper insertion order.
Use `insert-versions.py` to add versions. It reads NDJSON in the above format from stdin and merges
them into the target file, deduplicating by version string, normalizing timestamps, and keeping the
file sorted newest-first.

```bash
echo '{"version":"1.0.0","date":"...","artifacts":[...]}' | uv run scripts/insert-versions.py --name uv
Expand Down
58 changes: 48 additions & 10 deletions scripts/backfill-versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
import re
import sys
import time
from datetime import datetime, timedelta, timezone
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, NotRequired, TypedDict
from typing import Any, TypedDict

import httpx

Expand Down Expand Up @@ -88,13 +88,40 @@ def extract_platform_from_filename(filename: str, project_name: str) -> str | No


def parse_github_datetime(value: str) -> datetime | None:
"""Parse GitHub ISO timestamps."""
"""Parse an ISO 8601 timestamp and normalize it to UTC."""
if not value:
return None
try:
return datetime.fromisoformat(value.replace("Z", "+00:00"))
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return None
if parsed.tzinfo is None:
return None
return parsed.astimezone(timezone.utc)


def format_timestamp(value: datetime) -> str:
"""Format a datetime in canonical UTC RFC3339 form."""
return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")


def normalize_timestamp(value: str) -> str | None:
"""Normalize a timestamp string to canonical UTC RFC3339 form."""
parsed = parse_github_datetime(value)
if parsed is None:
return None
return format_timestamp(parsed)


def sort_versions_desc(versions: list[Version]) -> None:
"""Sort versions newest-first using parsed timestamps."""
versions.sort(
key=lambda version: (
parse_github_datetime(version["date"])
or datetime.min.replace(tzinfo=timezone.utc)
),
reverse=True,
)


def parse_sha256sums(text: str) -> dict[str, str]:
Expand Down Expand Up @@ -279,6 +306,10 @@ def process_pbs_release(
release: dict[str, Any], published_at: str, client: httpx.Client
) -> list[Version]:
"""Process python-build-standalone releases into our version format."""
normalized_published_at = normalize_timestamp(published_at)
if normalized_published_at is None:
return []

assets = release.get("assets", [])
if not assets:
return []
Expand Down Expand Up @@ -327,7 +358,7 @@ def process_pbs_release(
versions.append(
{
"version": version,
"date": published_at,
"date": normalized_published_at,
"artifacts": artifacts,
}
)
Expand Down Expand Up @@ -358,11 +389,18 @@ def process_release(
return []

published_datetime = parse_github_datetime(published_at)
if cutoff and published_datetime and published_datetime < cutoff:
if published_datetime is None:
return []

if cutoff and published_datetime < cutoff:
return []

normalized_published_at = normalize_timestamp(published_at)
if normalized_published_at is None:
return []

if project_name == "python-build-standalone":
return process_pbs_release(release, published_at, client)
return process_pbs_release(release, normalized_published_at, client)

# Fetch all checksums for this release
checksums = fetch_release_checksums(release, client)
Expand Down Expand Up @@ -411,7 +449,7 @@ def process_release(
return [
{
"version": tag_name,
"date": published_at,
"date": normalized_published_at,
"artifacts": artifacts,
}
]
Expand Down Expand Up @@ -497,7 +535,7 @@ def main() -> None:
new_version_ids = {v["version"] for v in new_versions}
merged = [v for v in existing if v["version"] not in new_version_ids]
merged.extend(new_versions)
merged.sort(key=lambda v: v["date"], reverse=True)
sort_versions_desc(merged)

versions = merged
print(f"Merged into {len(versions)} total versions", file=sys.stderr)
Expand All @@ -524,7 +562,7 @@ def main() -> None:
versions.append(version)

# Sort by date (newest first)
versions.sort(key=lambda v: v["date"], reverse=True)
sort_versions_desc(versions)

print(f"Processed {len(versions)} valid versions", file=sys.stderr)

Expand Down
54 changes: 52 additions & 2 deletions scripts/convert-cargo-dist-plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@
"""Convert cargo-dist plan JSON to a version NDJSON line.

Reads `cargo dist plan --output-format=json` from stdin and outputs
a single NDJSON line to stdout.
one NDJSON line to stdout.

The output `date` is normalized to UTC RFC3339 and comes from the
GitHub release's `published_at` timestamp.

Usage:
cargo dist plan --output-format=json | convert-cargo-dist-plan.py
"""

import json
import os
import re
import sys
import time
Expand All @@ -34,6 +38,33 @@ def get_archive_format(filename: str) -> str:
return "unknown"


def build_github_headers() -> dict[str, str]:
"""Build GitHub API headers, using GITHUB_TOKEN when available."""
headers = {"Accept": "application/vnd.github.v3+json"}
github_token = os.environ.get("GITHUB_TOKEN")
if github_token:
headers["Authorization"] = f"Bearer {github_token}"
return headers


def parse_timestamp(value: str) -> datetime:
"""Parse an ISO 8601 timestamp and normalize it to UTC."""
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
if parsed.tzinfo is None:
raise ValueError(f"timestamp must include a timezone offset: {value!r}")
return parsed.astimezone(timezone.utc)


def format_timestamp(value: datetime) -> str:
"""Format a datetime in canonical UTC RFC3339 form."""
return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")


def normalize_timestamp(value: str) -> str:
"""Normalize a timestamp string to canonical UTC RFC3339 form."""
return format_timestamp(parse_timestamp(value))


def fetch_sha256(client: httpx.Client, url: str) -> str | None:
"""Fetch SHA256 checksum from a .sha256 URL."""
for attempt in range(1, 4):
Expand All @@ -52,6 +83,24 @@ def fetch_sha256(client: httpx.Client, url: str) -> str | None:
return None


def fetch_release_published_at(
client: httpx.Client, org: str, repo: str, tag: str
) -> str:
"""Fetch and normalize the GitHub release published_at timestamp."""
response = client.get(
f"https://api.github.com/repos/{org}/{repo}/releases/tags/{tag}",
headers=build_github_headers(),
)
response.raise_for_status()

published_at = response.json().get("published_at")
if not isinstance(published_at, str) or not published_at:
raise ValueError(
f"GitHub release {org}/{repo}@{tag} did not include a published_at timestamp"
)

return normalize_timestamp(published_at)


def extract_github_info(manifest: dict[str, Any]) -> tuple[str, str, str]:
"""Extract GitHub org, repo, and app name from manifest.
Expand Down Expand Up @@ -84,6 +133,7 @@ def extract_version_info(
"""Extract version information from cargo-dist manifest."""
version = manifest["announcement_tag"]
github_org, github_repo, app_name = extract_github_info(manifest)
published_at = fetch_release_published_at(client, github_org, github_repo, version)
artifacts_data = []

for release in manifest.get("releases", []):
Expand Down Expand Up @@ -130,7 +180,7 @@ def extract_version_info(

return {
"version": version,
"date": datetime.now(timezone.utc).isoformat(),
"date": published_at,
"artifacts": artifacts_data,
}

Expand Down
75 changes: 68 additions & 7 deletions scripts/insert-versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
# ///
"""Insert version objects into an NDJSON versions file.

Reads NDJSON from stdin (one version object per line) and prepends them
to the target file, deduplicating by version string.
Reads NDJSON from stdin (one version object per line), merges it into the target
file, deduplicates by version string, normalizes timestamps, and keeps versions
sorted newest-first.

Usage:
echo '{"version":"1.0.0","date":"...","artifacts":[...]}' | insert-versions.py --name uv
Expand All @@ -15,13 +16,49 @@
import argparse
import json
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

REQUIRED_ARTIFACT_KEYS = {"platform", "variant", "url", "archive_format", "sha256"}
VALID_ARCHIVE_FORMATS = {"tar.gz", "tar.zst", "zip"}


def validate_version(entry: dict) -> list[str]:
def parse_timestamp(value: str) -> datetime:
"""Parse an ISO 8601 timestamp and normalize it to UTC."""
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
if parsed.tzinfo is None:
raise ValueError(f"timestamp must include a timezone offset: {value!r}")
return parsed.astimezone(timezone.utc)


def format_timestamp(value: datetime) -> str:
"""Format a datetime in canonical UTC RFC3339 form."""
return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")


def normalize_timestamp(value: str) -> str:
"""Normalize a timestamp string to canonical UTC RFC3339 form."""
return format_timestamp(parse_timestamp(value))


def normalize_versions_in_place(versions: list[dict[str, Any]]) -> None:
"""Normalize all version dates in-place."""
for version in versions:
raw_date = version.get("date")
if not isinstance(raw_date, str) or not raw_date:
raise ValueError(
f"version {version.get('version', '<unknown>')!r} is missing a valid date"
)
version["date"] = normalize_timestamp(raw_date)


def sort_versions_desc(versions: list[dict[str, Any]]) -> None:
"""Sort versions newest-first using parsed timestamps."""
versions.sort(key=lambda version: parse_timestamp(version["date"]), reverse=True)


def validate_version(entry: dict[str, Any]) -> list[str]:
"""Validate a version entry against the expected schema.

Returns a list of error messages (empty if valid).
Expand All @@ -31,8 +68,14 @@ def validate_version(entry: dict) -> list[str]:
if not isinstance(entry.get("version"), str) or not entry["version"]:
errors.append("missing or empty 'version'")

if not isinstance(entry.get("date"), str) or not entry["date"]:
raw_date = entry.get("date")
if not isinstance(raw_date, str) or not raw_date:
errors.append("missing or empty 'date'")
else:
try:
normalize_timestamp(raw_date)
except ValueError as e:
errors.append(f"invalid 'date': {e}")

artifacts = entry.get("artifacts")
if not isinstance(artifacts, list) or not artifacts:
Expand Down Expand Up @@ -102,6 +145,8 @@ def main() -> None:
print("No versions provided on stdin", file=sys.stderr)
sys.exit(1)

normalize_versions_in_place(new_versions)

# Sort artifacts within each version by (platform, variant)
for version in new_versions:
version["artifacts"].sort(key=lambda a: (a["platform"], a["variant"]))
Expand All @@ -128,22 +173,38 @@ def main() -> None:
except json.JSONDecodeError:
continue

try:
normalize_versions_in_place(existing)
except ValueError as e:
print(
f"Error normalizing existing versions in {versions_path}: {e}",
file=sys.stderr,
)
sys.exit(1)

# Deduplicate: remove existing entries that match incoming version strings
incoming_version_strings = {v["version"] for v in new_versions}
existing = [v for v in existing if v["version"] not in incoming_version_strings]

# Prepend new versions
# Merge and sort newest-first
versions = new_versions + existing
sort_versions_desc(versions)

# Write compact NDJSON
with open(versions_path, "w") as f:
for version in versions:
f.write(json.dumps(version, separators=(",", ":")) + "\n")

if len(new_versions) == 1:
print(f"Inserted version {new_versions[0]['version']} into {versions_path}", file=sys.stderr)
print(
f"Inserted version {new_versions[0]['version']} into {versions_path}",
file=sys.stderr,
)
else:
print(f"Inserted {len(new_versions)} versions into {versions_path}", file=sys.stderr)
print(
f"Inserted {len(new_versions)} versions into {versions_path}",
file=sys.stderr,
)


if __name__ == "__main__":
Expand Down
60 changes: 30 additions & 30 deletions v1/python-build-standalone.ndjson

Large diffs are not rendered by default.

Loading