Skip to content

Commit ce745c6

Browse files
committed
Cap GitHub API pagination for releases and owner repos
fetch_releases and load_owner_repos_names previously used the global auto_paginate client which fetches all pages into memory. For repos with thousands of releases (each containing markdown bodies and asset metadata) or orgs with thousands of repos, this can use significant memory. Switch to manual pagination with a default cap of 10 pages (1000 items).
1 parent 3554789 commit ce745c6

File tree

2 files changed

+175
-4
lines changed

2 files changed

+175
-4
lines changed

app/models/hosts/github.rb

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,21 @@ def download_releases(repository)
216216
nil
217217
end
218218

219-
def fetch_releases(repository)
220-
api_client.releases(repository.full_name).map do |release|
219+
def fetch_releases(repository, max_pages: 10)
220+
client = api_client(nil, auto_paginate: false)
221+
releases = []
222+
response = client.releases(repository.full_name, per_page: 100)
223+
releases.concat(response)
224+
225+
last_resp = client.last_response
226+
pages_fetched = 1
227+
while pages_fetched < max_pages && last_resp.rels[:next]
228+
last_resp = last_resp.rels[:next].get
229+
releases.concat(last_resp.data)
230+
pages_fetched += 1
231+
end
232+
233+
releases.map do |release|
221234
{
222235
uuid: release.id,
223236
tag_name: release.tag_name,
@@ -237,8 +250,21 @@ def fetch_releases(repository)
237250
[]
238251
end
239252

240-
def load_owner_repos_names(owner)
241-
api_client.repos(owner.login, type: "all").map { |repo| repo[:full_name] }
253+
def load_owner_repos_names(owner, max_pages: 10)
254+
client = api_client(nil, auto_paginate: false)
255+
repos = []
256+
response = client.repos(owner.login, type: "all", per_page: 100)
257+
repos.concat(response.map { |repo| repo[:full_name] })
258+
259+
last_resp = client.last_response
260+
pages_fetched = 1
261+
while pages_fetched < max_pages && last_resp.rels[:next]
262+
last_resp = last_resp.rels[:next].get
263+
repos.concat(last_resp.data.map { |repo| repo[:full_name] })
264+
pages_fetched += 1
265+
end
266+
267+
repos
242268
rescue *IGNORABLE_EXCEPTIONS, Octokit::NotFound
243269
[]
244270
end

test/models/hosts/github_test.rb

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
require "test_helper"
2+
3+
class Hosts::GithubTest < ActiveSupport::TestCase
4+
setup do
5+
@host = create(:github_host)
6+
@github = Hosts::Github.new(@host)
7+
@repository = create(:repository, host: @host, full_name: 'testuser/testrepo', owner: 'testuser')
8+
end
9+
10+
context 'fetch_releases' do
11+
should 'fetch releases with manual pagination' do
12+
release = OpenStruct.new(
13+
id: 1,
14+
tag_name: 'v1.0.0',
15+
target_commitish: 'main',
16+
name: 'Release 1.0.0',
17+
body: 'First release',
18+
draft: false,
19+
prerelease: false,
20+
created_at: 1.day.ago,
21+
published_at: 1.day.ago,
22+
author: OpenStruct.new(login: 'testuser'),
23+
assets: []
24+
)
25+
26+
last_response = mock('last_response')
27+
last_response.stubs(:rels).returns({})
28+
29+
client = mock('client')
30+
client.expects(:releases).with('testuser/testrepo', per_page: 100).returns([release])
31+
client.stubs(:last_response).returns(last_response)
32+
33+
@github.stubs(:api_client).with(nil, auto_paginate: false).returns(client)
34+
35+
result = @github.fetch_releases(@repository)
36+
37+
assert_equal 1, result.length
38+
assert_equal 1, result.first[:uuid]
39+
assert_equal 'v1.0.0', result.first[:tag_name]
40+
end
41+
42+
should 'stop after max_pages' do
43+
release1 = OpenStruct.new(
44+
id: 1, tag_name: 'v1.0', target_commitish: 'main', name: 'r', body: 'b',
45+
draft: false, prerelease: false, created_at: Time.now, published_at: Time.now,
46+
author: OpenStruct.new(login: 'u'), assets: []
47+
)
48+
release2 = OpenStruct.new(
49+
id: 2, tag_name: 'v2.0', target_commitish: 'main', name: 'r2', body: 'b2',
50+
draft: false, prerelease: false, created_at: Time.now, published_at: Time.now,
51+
author: OpenStruct.new(login: 'u'), assets: []
52+
)
53+
54+
page2_response = mock('page2_response')
55+
page2_response.stubs(:data).returns([release2])
56+
page2_response.stubs(:rels).returns({})
57+
58+
next_rel = mock('next_rel')
59+
next_rel.stubs(:get).returns(page2_response)
60+
61+
first_last_response = mock('first_last_response')
62+
first_last_response.stubs(:rels).returns({ next: next_rel })
63+
64+
client = mock('client')
65+
client.expects(:releases).with('testuser/testrepo', per_page: 100).returns([release1])
66+
client.stubs(:last_response).returns(first_last_response)
67+
68+
@github.stubs(:api_client).with(nil, auto_paginate: false).returns(client)
69+
70+
result = @github.fetch_releases(@repository, max_pages: 2)
71+
72+
assert_equal 2, result.length
73+
assert_equal 'v1.0', result.first[:tag_name]
74+
assert_equal 'v2.0', result.last[:tag_name]
75+
end
76+
77+
should 'return empty array on error' do
78+
client = mock('client')
79+
client.expects(:releases).raises(Octokit::NotFound)
80+
@github.stubs(:api_client).with(nil, auto_paginate: false).returns(client)
81+
82+
result = @github.fetch_releases(@repository)
83+
84+
assert_equal [], result
85+
end
86+
end
87+
88+
context 'load_owner_repos_names' do
89+
setup do
90+
@owner = OpenStruct.new(login: 'testuser')
91+
end
92+
93+
should 'fetch repo names with manual pagination' do
94+
repo = { full_name: 'testuser/repo1' }
95+
96+
last_response = mock('last_response')
97+
last_response.stubs(:rels).returns({})
98+
99+
client = mock('client')
100+
client.expects(:repos).with('testuser', type: 'all', per_page: 100).returns([repo])
101+
client.stubs(:last_response).returns(last_response)
102+
103+
@github.stubs(:api_client).with(nil, auto_paginate: false).returns(client)
104+
105+
result = @github.load_owner_repos_names(@owner)
106+
107+
assert_equal ['testuser/repo1'], result
108+
end
109+
110+
should 'stop after max_pages' do
111+
repo1 = { full_name: 'testuser/repo1' }
112+
repo2 = { full_name: 'testuser/repo2' }
113+
114+
page2_response = mock('page2_response')
115+
page2_response.stubs(:data).returns([repo2])
116+
page2_response.stubs(:rels).returns({})
117+
118+
next_rel = mock('next_rel')
119+
next_rel.stubs(:get).returns(page2_response)
120+
121+
first_last_response = mock('first_last_response')
122+
first_last_response.stubs(:rels).returns({ next: next_rel })
123+
124+
client = mock('client')
125+
client.expects(:repos).with('testuser', type: 'all', per_page: 100).returns([repo1])
126+
client.stubs(:last_response).returns(first_last_response)
127+
128+
@github.stubs(:api_client).with(nil, auto_paginate: false).returns(client)
129+
130+
result = @github.load_owner_repos_names(@owner, max_pages: 2)
131+
132+
assert_equal ['testuser/repo1', 'testuser/repo2'], result
133+
end
134+
135+
should 'return empty array on error' do
136+
client = mock('client')
137+
client.expects(:repos).raises(Octokit::NotFound)
138+
@github.stubs(:api_client).with(nil, auto_paginate: false).returns(client)
139+
140+
result = @github.load_owner_repos_names(@owner)
141+
142+
assert_equal [], result
143+
end
144+
end
145+
end

0 commit comments

Comments
 (0)