From 5122ffed143693975c065379fd0431fb99f2bda3 Mon Sep 17 00:00:00 2001 From: gibsondan Date: Fri, 22 May 2026 15:07:25 -0500 Subject: [PATCH] remove PyGithub from PEX, use requests for GitHub API calls The PEX only used PyGithub for two small scripts (fetch_github_avatar.py and create_or_update_comment.py). Rewriting them against requests (which is already a transitive dep in the PEX) drops PyGithub and gives us automatic retry on transient 5xx/429 via urllib3 Retry. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/release.py | 2 +- src/Dockerfile | 1 + src/create_or_update_comment.py | 82 ++++++++++++++++++++++----------- src/fetch_github_avatar.py | 22 ++++++--- src/github_session.py | 33 +++++++++++++ 5 files changed, 105 insertions(+), 35 deletions(-) create mode 100644 src/github_session.py diff --git a/scripts/release.py b/scripts/release.py index 394c1d64..141883c0 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -143,7 +143,7 @@ def build_dagster_cloud_pex( dagster_dg_core_pkg, dagster_pipes_pkg, dagster_shared_pkg, - "PyGithub", + "requests", "pex>=2.1.132,<3", "pip", f"-o={output_name}", diff --git a/src/Dockerfile b/src/Dockerfile index ff0aec09..65a49b72 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -33,6 +33,7 @@ COPY src/expand_env_vars.py /expand_env_vars.py COPY src/create_or_update_comment.py /create_or_update_comment.py COPY src/expand_json_env.py /expand_json_env.py COPY src/fetch_github_avatar.py /fetch_github_avatar.py +COPY src/github_session.py /github_session.py COPY src/parse_workspace.py parse_workspace.py diff --git a/src/create_or_update_comment.py b/src/create_or_update_comment.py index 8bdf0f87..5ed3266e 100644 --- a/src/create_or_update_comment.py +++ b/src/create_or_update_comment.py @@ -1,12 +1,15 @@ import datetime -import github -from github import Github import os +import sys + +from github_session import GITHUB_API, github_session """ Creates or updates a build status comment on a Pull Request, for branch deployments. """ +COMMENTS_PAGE_SIZE = 100 + SUCCESS_IMAGE_URL = ( "https://raw.githubusercontent.com/dagster-io/dagster-cloud-action/main/assets/success.png" ) @@ -17,9 +20,9 @@ "https://raw.githubusercontent.com/dagster-io/dagster-cloud-action/main/assets/failed.png" ) + def main(): - # Fetch various pieces of info from the environment - g = Github(os.getenv("GITHUB_TOKEN")) + token = os.getenv("GITHUB_TOKEN") pr_id = int(os.getenv("INPUT_PR")) repo_id = os.getenv("GITHUB_REPOSITORY") action = os.getenv("INPUT_ACTION") @@ -30,23 +33,11 @@ def main(): location_name = os.getenv("INPUT_LOCATION_NAME") - repo = g.get_repo(repo_id) - pr = repo.get_pull(pr_id) - - comments = pr.get_issue_comments() - comment_to_update = None - - # Check if a comment exists on the PR from the github actions user - # which is specific to this location name - # otherwise we create a new comment - for comment in comments: - if ( - comment.user.login == "github-actions[bot]" - and "Dagster Cloud" in comment.body - and f"`{location_name}`" in comment.body - ): - comment_to_update = comment - break + # PATCH on a specific comment is effectively idempotent here (same body, + # same id), so opt it in to retries. POST is left out to avoid duplicate + # comment creation if GitHub processes the request but the response is lost. + session = github_session(token, retry_methods=("GET", "PATCH")) + comment_to_update_id = _find_existing_comment(session, repo_id, pr_id, location_name) deployment_url = f"{org_url}/{deployment_name}/home" @@ -64,19 +55,56 @@ def main(): time_str = datetime.datetime.now(datetime.timezone.utc).strftime("%b %d, %Y at %I:%M %p (%Z)") - message = f""" + body = f""" Your pull request is automatically being deployed to Dagster Cloud. | Location | Status | Link | Updated | -| ----------------- | --------------- | ------- | --------------- | +| ----------------- | --------------- | ------- | --------------- | | `{location_name}` | {status_image} | {message} | {time_str} | """ - if comment_to_update: - comment_to_update.edit(message) + if comment_to_update_id is not None: + resp = session.patch( + f"{GITHUB_API}/repos/{repo_id}/issues/comments/{comment_to_update_id}", + json={"body": body}, + timeout=60, + ) else: - pr.create_issue_comment(message) + resp = session.post( + f"{GITHUB_API}/repos/{repo_id}/issues/{pr_id}/comments", + json={"body": body}, + timeout=60, + ) + resp.raise_for_status() + + +def _find_existing_comment(session, repo_id, pr_id, location_name): + # Check if a comment exists on the PR from the github actions user + # which is specific to this location name + page = 1 + while True: + resp = session.get( + f"{GITHUB_API}/repos/{repo_id}/issues/{pr_id}/comments", + params={"per_page": COMMENTS_PAGE_SIZE, "page": page}, + timeout=60, + ) + resp.raise_for_status() + comments = resp.json() + if not comments: + return None + for comment in comments: + user_login = (comment.get("user") or {}).get("login") + comment_body = comment.get("body") or "" + if ( + user_login == "github-actions[bot]" + and "Dagster Cloud" in comment_body + and f"`{location_name}`" in comment_body + ): + return comment["id"] + if len(comments) < COMMENTS_PAGE_SIZE: + return None + page += 1 if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/src/fetch_github_avatar.py b/src/fetch_github_avatar.py index b81fc64c..c49333bd 100644 --- a/src/fetch_github_avatar.py +++ b/src/fetch_github_avatar.py @@ -1,5 +1,9 @@ -from github import Github import os +import sys + +import requests + +from github_session import GITHUB_API, github_session """ Fetches a user's avatar from the Github API based on email or username @@ -7,16 +11,20 @@ def main(): - # Fetch various pieces of info from the environment - g = Github(os.getenv("GITHUB_TOKEN")) - + token = os.getenv("GITHUB_TOKEN") repo_id = os.getenv("GITHUB_REPOSITORY") commit_sha = os.getenv("GITHUB_SHA") - repo = g.get_repo(repo_id) - commit = repo.get_commit(commit_sha) + session = github_session(token) + try: + resp = session.get(f"{GITHUB_API}/repos/{repo_id}/commits/{commit_sha}", timeout=60) + resp.raise_for_status() + except requests.RequestException as err: + print(f"Failed to fetch commit {commit_sha}: {err}", file=sys.stderr) + sys.exit(1) - print(commit.author.avatar_url) + author = resp.json().get("author") or {} + print(author.get("avatar_url", "")) if __name__ == "__main__": diff --git a/src/github_session.py b/src/github_session.py new file mode 100644 index 00000000..bc194728 --- /dev/null +++ b/src/github_session.py @@ -0,0 +1,33 @@ +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +GITHUB_API = "https://api.github.com" + + +def github_session(token, retry_methods=("GET",)): + """A requests.Session preconfigured for the GitHub REST API. + + Sets the recommended Accept / API-version / User-Agent headers and a + bearer Authorization header when a token is provided, and mounts an + HTTPAdapter that retries transient 429/5xx responses for the given + HTTP methods (idempotent methods only by default). + """ + retry = Retry( + total=5, + backoff_factor=1, + status_forcelist=(429, 500, 502, 503, 504), + allowed_methods=frozenset(retry_methods), + ) + session = requests.Session() + session.mount("https://", HTTPAdapter(max_retries=retry)) + session.headers.update( + { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + "User-Agent": "dagster-cloud-action", + } + ) + if token: + session.headers["Authorization"] = f"Bearer {token}" + return session