|
15 | 15 | from pathlib import Path |
16 | 16 | from typing import TYPE_CHECKING, NotRequired, TypedDict, cast |
17 | 17 |
|
18 | | -from github import Github |
| 18 | +from github import Github, GithubException, UnknownObjectException |
19 | 19 | from yaml import safe_dump, safe_load |
20 | 20 |
|
21 | 21 | from ._logging import log, setup_logging |
@@ -57,17 +57,80 @@ def merge_repos(known: Iterable[Repo], new: Iterable[str]) -> list[Repo]: |
57 | 57 | return repos |
58 | 58 |
|
59 | 59 |
|
| 60 | +def filter_repos(repos: list[Repo], github_token: str | None) -> list[Repo]: |
| 61 | + """Filter repositories based on their GitHub status. |
| 62 | +
|
| 63 | + Removes archived repositories, non-existent repositories, and repositories |
| 64 | + that no longer have .cruft.json in their root. Updates URLs for moved repositories. |
| 65 | +
|
| 66 | + Parameters |
| 67 | + ---------- |
| 68 | + repos |
| 69 | + List of repositories to filter |
| 70 | + github_token |
| 71 | + GitHub API token for authentication |
| 72 | +
|
| 73 | + Returns |
| 74 | + ------- |
| 75 | + Filtered list of repositories |
| 76 | + """ |
| 77 | + g = Github(github_token) |
| 78 | + filtered_repos = [] |
| 79 | + known_urls = {repo["url"] for repo in repos} |
| 80 | + |
| 81 | + for repo in repos: |
| 82 | + url = repo["url"] |
| 83 | + github_url_prefix = "https://github.com/" |
| 84 | + if not url.startswith(github_url_prefix): |
| 85 | + raise AssertionError |
| 86 | + repo_name = url.replace(github_url_prefix, "") |
| 87 | + log.info(f"Checking repo {repo_name}") |
| 88 | + |
| 89 | + try: |
| 90 | + gh_repo = g.get_repo(repo_name) |
| 91 | + except (GithubException, UnknownObjectException) as e: |
| 92 | + # Repo doesn't exist or other error |
| 93 | + log.info(f"Removing non-existent or inaccessible repo: {repo_name} ({e})") |
| 94 | + continue |
| 95 | + |
| 96 | + # Check if repo is archived |
| 97 | + if gh_repo.archived: |
| 98 | + log.info(f"Removing archived repo: {repo_name}") |
| 99 | + continue |
| 100 | + |
| 101 | + # Check if repo has been moved/renamed |
| 102 | + if gh_repo.html_url != url: |
| 103 | + log.info(f"Repo moved: {url} -> {gh_repo.html_url}") |
| 104 | + if gh_repo.html_url in known_urls: |
| 105 | + # duplicate already exists |
| 106 | + continue |
| 107 | + repo["url"] = gh_repo.html_url |
| 108 | + |
| 109 | + # Check if .cruft.json exists in root |
| 110 | + try: |
| 111 | + gh_repo.get_contents(".cruft.json") |
| 112 | + except UnknownObjectException: |
| 113 | + log.info(f"Removing repo without .cruft.json: {repo_name}") |
| 114 | + continue |
| 115 | + |
| 116 | + filtered_repos.append(repo) |
| 117 | + |
| 118 | + return filtered_repos |
| 119 | + |
| 120 | + |
60 | 121 | def main(args: Sequence[str] | None = None) -> None: |
61 | 122 | setup_logging() |
62 | 123 | if args is None: |
63 | 124 | args = sys.argv[1:] |
64 | 125 | if len(args) != 1: |
65 | 126 | sys.exit("Usage: register-template-repos template-repos.yml") |
66 | 127 | path = Path(args[0]) |
| 128 | + github_token = os.environ["GITHUB_TOKEN"] |
67 | 129 | repos = merge_repos( |
68 | 130 | parse_repos(path), |
69 | | - search_repos(os.environ["GITHUB_TOKEN"]), |
| 131 | + search_repos(github_token), |
70 | 132 | ) |
| 133 | + repos = filter_repos(repos, github_token) |
71 | 134 | if repos: |
72 | 135 | with path.open("w") as f: |
73 | 136 | safe_dump(sorted(repos, key=lambda r: r["url"]), f, sort_keys=False) |
|
0 commit comments