-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfetch-ghcr-stats.py
More file actions
executable file
·106 lines (78 loc) · 3.23 KB
/
fetch-ghcr-stats.py
File metadata and controls
executable file
·106 lines (78 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python3
"""
Fetch GHCR (GitHub Container Registry) download counts.
GitHub doesn't expose container download counts via API, so this script
attempts to scrape them from the packages UI page.
"""
import re
import sys
import json
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
def fetch_ghcr_stats(org: str, repo: str) -> dict[str, int]:
"""
Fetch GHCR download counts by scraping the packages page.
Args:
org: GitHub organization name
repo: Repository name
Returns:
Dictionary mapping package names to download counts
"""
url = f"https://github.com/orgs/{org}/packages?repo_name={repo}"
# Create request with headers to mimic a browser
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
}
try:
request = Request(url, headers=headers)
with urlopen(request, timeout=10) as response:
html = response.read().decode('utf-8')
# Parse download counts from HTML
# Pattern: package name followed by download count
# The page structure: <a href="...">package-name</a> ... <span>123,456 downloads</span>
packages = {}
# Find all package containers
# Look for patterns like: <a href="/orgs/.../packages/container/package-name">
package_pattern = r'packages/container/([^"]+)"[^>]*>.*?(\d[\d,]*)\s*(?:total\s+)?downloads?'
matches = re.finditer(package_pattern, html, re.DOTALL | re.IGNORECASE)
for match in matches:
package_name = match.group(1)
download_count_str = match.group(2).replace(',', '')
download_count = int(download_count_str)
packages[package_name] = download_count
return packages
except (URLError, HTTPError) as e:
print(f"Error fetching data: {e}", file=sys.stderr)
return {}
except Exception as e:
print(f"Unexpected error: {e}", file=sys.stderr)
return {}
def format_number(num: int) -> str:
"""Format number with thousands separator."""
return f"{num:,}"
def main():
"""Main entry point."""
org = "homeassistant-ai"
repo = "ha-mcp"
print(f"Fetching GHCR stats for {org}/{repo}...", file=sys.stderr)
packages = fetch_ghcr_stats(org, repo)
if not packages:
print("Failed to fetch GHCR stats", file=sys.stderr)
print("Please update manually from:", file=sys.stderr)
print(f"https://github.com/orgs/{org}/packages?repo_name={repo}", file=sys.stderr)
sys.exit(1)
# Print as JSON for easy parsing by shell script
print(json.dumps(packages, indent=2))
# Also print formatted table to stderr for human viewing
print("\nGHCR Download Counts:", file=sys.stderr)
print("-" * 50, file=sys.stderr)
total = 0
for package, count in sorted(packages.items()):
print(f" {package:30s} {format_number(count):>15s}", file=sys.stderr)
total += count
print("-" * 50, file=sys.stderr)
print(f" {'TOTAL':30s} {format_number(total):>15s}", file=sys.stderr)
if __name__ == "__main__":
main()