Skip to content

Commit 3c57080

Browse files
authored
Refactor GitHub repos list to use async requests (#4383)
Reduce the time when fetching multiple pages by performing the requests asynchronously, using up to `ASYNC_CONCURRENCY` concurrent requests. Closes DIAGNijmegen/rse-grand-challenge-admin#664
1 parent f6fd06b commit 3c57080

1 file changed

Lines changed: 78 additions & 24 deletions

File tree

app/grandchallenge/github/views.py

Lines changed: 78 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1+
import asyncio
12
import hashlib
23
import hmac
34
import json
45
from secrets import compare_digest
56

7+
import httpx
68
import requests
9+
from asgiref.sync import async_to_sync
710
from dal_select2.views import Select2ListView
811
from django.conf import settings
912
from django.contrib.auth.decorators import login_required
@@ -17,6 +20,7 @@
1720
from guardian.mixins import LoginRequiredMixin
1821
from requests import HTTPError
1922

23+
from grandchallenge.components.backends.base import ASYNC_CONCURRENCY
2024
from grandchallenge.github.exceptions import GitHubBadRefreshTokenException
2125
from grandchallenge.github.models import GitHubUserToken, GitHubWebhookMessage
2226
from grandchallenge.github.utils import (
@@ -116,20 +120,18 @@ def github_app_install_url(self):
116120
return f"{settings.GITHUB_APP_INSTALL_URL}?state={self.github_state}"
117121

118122
@cached_property
119-
def github_request_kwargs(self):
123+
def github_request_headers(self):
120124
return {
121-
"headers": {
122-
"Accept": "application/vnd.github+json",
123-
"Authorization": f"token {self.github_user_token.access_token}",
124-
},
125-
"timeout": 5,
125+
"Accept": "application/vnd.github+json",
126+
"Authorization": f"token {self.github_user_token.access_token}",
126127
}
127128

128129
@cached_property
129130
def installations(self):
130-
response = requests.get(
131+
response = httpx.get(
131132
"https://api.github.com/user/installations",
132-
**self.github_request_kwargs,
133+
headers=self.github_request_headers,
134+
timeout=10,
133135
)
134136
response.raise_for_status()
135137
return response.json()["installations"]
@@ -165,7 +167,22 @@ class RepositoriesList(
165167
):
166168
raise_exception = True
167169

168-
def get_repos(self, *, installation_id):
170+
async def _get_page_github_repos(
171+
self, *, installation_id, page, per_page, semaphore, httpx_client
172+
):
173+
async with semaphore:
174+
response = await httpx_client.get(
175+
f"https://api.github.com/user/installations/{installation_id}/repositories",
176+
params={"per_page": per_page, "page": page},
177+
headers=self.github_request_headers,
178+
)
179+
response.raise_for_status()
180+
181+
return response.json()
182+
183+
async def _get_installation_github_repos(
184+
self, *, installation_id, semaphore, httpx_client
185+
):
169186
"""
170187
Get the repositories for this users installation
171188
@@ -174,30 +191,67 @@ def get_repos(self, *, installation_id):
174191
"""
175192
per_page = 100
176193

177-
def get_page(*, page):
178-
return requests.get(
179-
f"https://api.github.com/user/installations/{installation_id}/repositories",
180-
params={"per_page": per_page, "page": page},
181-
**self.github_request_kwargs,
182-
).json()
194+
response = await self._get_page_github_repos(
195+
installation_id=installation_id,
196+
page=1,
197+
per_page=per_page,
198+
semaphore=semaphore,
199+
httpx_client=httpx_client,
200+
)
183201

184-
response = get_page(page=1)
185202
repos = [repo["full_name"] for repo in response["repositories"]]
186203

187204
remaining_pages = (response["total_count"] - 1) // per_page
188205

189-
for ii in range(remaining_pages):
190-
repos += [
191-
repo["full_name"]
192-
for repo in get_page(page=ii + 2)["repositories"]
193-
]
206+
tasks = []
207+
208+
async with asyncio.TaskGroup() as task_group:
209+
for ii in range(remaining_pages):
210+
page = ii + 2
211+
tasks.append(
212+
task_group.create_task(
213+
self._get_page_github_repos(
214+
installation_id=installation_id,
215+
page=page,
216+
per_page=per_page,
217+
semaphore=semaphore,
218+
httpx_client=httpx_client,
219+
)
220+
)
221+
)
222+
223+
for task in tasks:
224+
response = task.result()
225+
repos += [repo["full_name"] for repo in response["repositories"]]
194226

195227
return repos
196228

197-
def get_list(self):
229+
@async_to_sync
230+
async def _get_all_github_repos(self):
231+
semaphore = asyncio.Semaphore(ASYNC_CONCURRENCY)
232+
timeout = httpx.Timeout(10)
233+
234+
tasks = []
235+
236+
async with httpx.AsyncClient(timeout=timeout) as httpx_client:
237+
async with asyncio.TaskGroup() as task_group:
238+
for installation in self.installations:
239+
tasks.append(
240+
task_group.create_task(
241+
self._get_installation_github_repos(
242+
installation_id=installation["id"],
243+
semaphore=semaphore,
244+
httpx_client=httpx_client,
245+
)
246+
)
247+
)
248+
198249
repos = []
199250

200-
for installation in self.installations:
201-
repos += self.get_repos(installation_id=installation["id"])
251+
for task in tasks:
252+
repos += task.result()
202253

203254
return repos
255+
256+
def get_list(self):
257+
return self._get_all_github_repos()

0 commit comments

Comments
 (0)