Skip to content

Commit 4362854

Browse files
Merge pull request #100 from deariary/fix/paginate-commits-and-jsdom
feat: paginate commit fetching and remove per-repo/total caps
2 parents 74f07fb + 0354553 commit 4362854

14 files changed

Lines changed: 417 additions & 208 deletions

src/cli/commands/fetch.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { fetchEvents, dedupeEvents } from "../../collector/fetch-events.js";
1010
import { fetchContributions } from "../../collector/fetch-contributions.js";
1111
import { fetchPRsByRefs, type PRRef } from "../../collector/fetch-repo-prs.js";
1212
import { fetchCommitMessages } from "../../collector/fetch-commits.js";
13+
import { fetchReleases } from "../../collector/fetch-releases.js";
1314
import { aggregateRepositories } from "../../collector/aggregate.js";
1415
import { getWeekId, getCurrentWeekId } from "../../deployer/week.js";
1516
import type { GitHubEvent } from "../../types.js";
@@ -238,6 +239,11 @@ const runWeeklyFetch = async (options: BaseOptions): Promise<void> => {
238239
const totalMsgs = commitMessages.reduce((sum, r) => sum + r.messages.length, 0);
239240
console.log(`Collected ${totalMsgs} commit messages from ${commitMessages.length} repositories.`);
240241

242+
// Fetch releases per repository
243+
console.log(`Fetching releases for ${repoNames.length} repositories...`);
244+
const releases = await fetchReleases(options.token, repoNames, plan.range);
245+
console.log(`Collected ${releases.length} releases.`);
246+
241247
const githubData = {
242248
username: contributions.username,
243249
avatarUrl: contributions.avatarUrl,
@@ -257,8 +263,9 @@ const runWeeklyFetch = async (options: BaseOptions): Promise<void> => {
257263
repositories,
258264
pullRequests,
259265
issues: [],
260-
events,
266+
events: events.filter((e) => e.payload.kind === "review"),
261267
commitMessages,
268+
releases,
262269
externalContributions: [],
263270
};
264271
const dataPath = join(plan.reportDir, "github-data.yaml");

src/cli/commands/generate.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ pullRequests: []
178178
issues: []
179179
events: []
180180
commitMessages: []
181+
releases: []
181182
externalContributions: []
182183
`;
183184

src/cli/commands/render.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ pullRequests: []
7373
issues: []
7474
events: []
7575
commitMessages: []
76+
releases: []
7677
externalContributions: []
7778
`;
7879

src/collector/fetch-commits.test.ts

Lines changed: 33 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,27 @@ const makeRawCommit = (message: string) => ({
1212
commit: { message, author: { date: "2026-04-01T12:00:00Z" } },
1313
});
1414

15+
// Helper to create a Response with a Link header for pagination
16+
const pagedResponse = (commits: unknown[], nextUrl?: string) => {
17+
const headers: Record<string, string> = {};
18+
if (nextUrl) headers["link"] = `<${nextUrl}>; rel="next"`;
19+
return new Response(JSON.stringify(commits), { status: 200, headers });
20+
};
21+
1522
describe("fetchCommitMessages", () => {
1623
beforeEach(() => {
1724
vi.restoreAllMocks();
1825
});
1926

2027
it("fetches commit messages for multiple repos", async () => {
2128
vi.spyOn(globalThis, "fetch")
22-
.mockResolvedValueOnce(
23-
new Response(JSON.stringify([
24-
makeRawCommit("feat: add login"),
25-
makeRawCommit("fix: typo in header"),
26-
]), { status: 200 }),
27-
)
28-
.mockResolvedValueOnce(
29-
new Response(JSON.stringify([
30-
makeRawCommit("chore: update deps"),
31-
]), { status: 200 }),
32-
);
29+
.mockResolvedValueOnce(pagedResponse([
30+
makeRawCommit("feat: add login"),
31+
makeRawCommit("fix: typo in header"),
32+
]))
33+
.mockResolvedValueOnce(pagedResponse([
34+
makeRawCommit("chore: update deps"),
35+
]));
3336

3437
const result = await fetchCommitMessages("token", "user", ["org/repo-a", "org/repo-b"], range);
3538

@@ -38,11 +41,24 @@ describe("fetchCommitMessages", () => {
3841
expect(result[1]).toEqual({ repo: "org/repo-b", messages: ["chore: update deps"] });
3942
});
4043

44+
it("paginates through multiple pages", async () => {
45+
const page1 = Array.from({ length: 100 }, (_, i) => makeRawCommit(`page1-${i}`));
46+
const page2 = Array.from({ length: 50 }, (_, i) => makeRawCommit(`page2-${i}`));
47+
48+
vi.spyOn(globalThis, "fetch")
49+
.mockResolvedValueOnce(pagedResponse(page1, "https://api.github.com/repos/org/repo/commits?page=2"))
50+
.mockResolvedValueOnce(pagedResponse(page2));
51+
52+
const result = await fetchCommitMessages("token", "user", ["org/repo"], range);
53+
54+
expect(result[0].messages).toHaveLength(150);
55+
expect(result[0].messages[0]).toBe("page1-0");
56+
expect(result[0].messages[100]).toBe("page2-0");
57+
});
58+
4159
it("extracts only the first line of multi-line commit messages", async () => {
4260
vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
43-
new Response(JSON.stringify([
44-
makeRawCommit("feat: new feature\n\nLong description here\nMore details"),
45-
]), { status: 200 }),
61+
pagedResponse([makeRawCommit("feat: new feature\n\nLong description here\nMore details")]),
4662
);
4763

4864
const result = await fetchCommitMessages("token", "user", ["org/repo"], range);
@@ -53,7 +69,7 @@ describe("fetchCommitMessages", () => {
5369
it("truncates long commit messages to 200 characters", async () => {
5470
const longMessage = "a".repeat(300);
5571
vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
56-
new Response(JSON.stringify([makeRawCommit(longMessage)]), { status: 200 }),
72+
pagedResponse([makeRawCommit(longMessage)]),
5773
);
5874

5975
const result = await fetchCommitMessages("token", "user", ["org/repo"], range);
@@ -63,9 +79,7 @@ describe("fetchCommitMessages", () => {
6379
});
6480

6581
it("skips repos with no commits", async () => {
66-
vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
67-
new Response(JSON.stringify([]), { status: 200 }),
68-
);
82+
vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(pagedResponse([]));
6983

7084
const result = await fetchCommitMessages("token", "user", ["org/empty"], range);
7185

@@ -92,32 +106,6 @@ describe("fetchCommitMessages", () => {
92106
expect(result).toHaveLength(0);
93107
});
94108

95-
it("respects per_page limit from API (10 per repo)", async () => {
96-
// API honors per_page=10, so even if repo has more commits, only 10 are returned
97-
const commits = Array.from({ length: 10 }, (_, i) => makeRawCommit(`commit ${i}`));
98-
vi.spyOn(globalThis, "fetch").mockResolvedValueOnce(
99-
new Response(JSON.stringify(commits), { status: 200 }),
100-
);
101-
102-
const result = await fetchCommitMessages("token", "user", ["org/busy"], range);
103-
104-
expect(result[0].messages.length).toBe(10);
105-
});
106-
107-
it("caps total messages at 50 across all repos", async () => {
108-
// 6 repos each returning 10 commits = 60 total, should be capped at 50
109-
const commits = Array.from({ length: 10 }, (_, i) => makeRawCommit(`msg ${i}`));
110-
vi.spyOn(globalThis, "fetch").mockImplementation(
111-
() => Promise.resolve(new Response(JSON.stringify(commits), { status: 200 })),
112-
);
113-
114-
const repos = Array.from({ length: 6 }, (_, i) => `org/repo-${i}`);
115-
const result = await fetchCommitMessages("token", "user", repos, range);
116-
117-
const totalMsgs = result.reduce((sum, r) => sum + r.messages.length, 0);
118-
expect(totalMsgs).toBeLessThanOrEqual(50);
119-
});
120-
121109
it("returns empty array for empty repos list", async () => {
122110
const result = await fetchCommitMessages("token", "user", [], range);
123111

@@ -129,9 +117,7 @@ describe("fetchCommitMessages", () => {
129117
.mockResolvedValueOnce(
130118
new Response("", { status: 429, headers: { "retry-after": "0" } }),
131119
)
132-
.mockResolvedValueOnce(
133-
new Response(JSON.stringify([makeRawCommit("after retry")]), { status: 200 }),
134-
);
120+
.mockResolvedValueOnce(pagedResponse([makeRawCommit("after retry")]));
135121

136122
const result = await fetchCommitMessages("token", "user", ["org/repo"], range);
137123

src/collector/fetch-commits.ts

Lines changed: 44 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ type RawCommit = {
1616
};
1717
};
1818

19-
const MAX_MESSAGES_PER_REPO = 10;
20-
const MAX_TOTAL_MESSAGES = 50;
19+
const PER_PAGE = 100;
2120
const MAX_MESSAGE_LENGTH = 200;
2221
const MAX_RETRIES = 3;
2322
const DEFAULT_RETRY_DELAY_MS = 5_000;
@@ -40,6 +39,14 @@ const parseRetryDelay = (response: Response): number => {
4039
return DEFAULT_RETRY_DELAY_MS;
4140
};
4241

42+
// Parse Link header to find next page URL
43+
const parseNextUrl = (response: Response): string | null => {
44+
const link = response.headers.get("link");
45+
if (!link) return null;
46+
const match = link.match(/<([^>]+)>;\s*rel="next"/);
47+
return match?.[1] ?? null;
48+
};
49+
4350
// Extract the first line of a commit message and truncate to MAX_MESSAGE_LENGTH
4451
const firstLine = (message: string): string => {
4552
const subject = message.split("\n")[0]?.trim() ?? message.trim();
@@ -48,50 +55,58 @@ const firstLine = (message: string): string => {
4855
: subject;
4956
};
5057

51-
const fetchRepoCommits = async (
58+
const fetchPage = async (
5259
token: string,
53-
repo: string,
54-
author: string,
55-
range: DateRange,
56-
): Promise<string[]> => {
57-
const params = new URLSearchParams({
58-
author,
59-
since: range.from.toISOString(),
60-
until: range.to.toISOString(),
61-
per_page: String(MAX_MESSAGES_PER_REPO),
62-
});
63-
const url = `https://api.github.com/repos/${repo}/commits?${params}`;
64-
60+
url: string,
61+
): Promise<{ commits: RawCommit[]; nextUrl: string | null } | null> => {
6562
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
6663
const response = await fetch(url, { headers: GITHUB_HEADERS(token) });
6764

6865
if (response.ok) {
6966
const commits = (await response.json()) as RawCommit[];
70-
return commits.map((c) => firstLine(c.commit.message));
67+
return { commits, nextUrl: parseNextUrl(response) };
7168
}
7269

73-
if (response.status === 409) {
74-
// Empty repository
75-
return [];
76-
}
70+
if (response.status === 409) return null; // Empty repository
71+
if (response.status === 403 || response.status === 404) return null;
7772

7873
if (response.status === 429 && attempt < MAX_RETRIES) {
7974
const delay = parseRetryDelay(response);
80-
console.warn(` ${repo}: 429, retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${MAX_RETRIES})`);
75+
console.warn(` 429, retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${MAX_RETRIES})`);
8176
await sleep(delay);
8277
continue;
8378
}
8479

85-
if (response.status === 403 || response.status === 404) {
86-
// Permission denied or repo not found (private, deleted, etc.)
87-
return [];
88-
}
80+
console.warn(` Failed to fetch commits: ${response.status} ${response.statusText}`);
81+
return null;
82+
}
8983

90-
console.warn(` Failed to fetch commits for ${repo}: ${response.status} ${response.statusText}`);
91-
return [];
84+
return null;
85+
};
86+
87+
const fetchRepoCommits = async (
88+
token: string,
89+
repo: string,
90+
author: string,
91+
range: DateRange,
92+
): Promise<string[]> => {
93+
const params = new URLSearchParams({
94+
author,
95+
since: range.from.toISOString(),
96+
until: range.to.toISOString(),
97+
per_page: String(PER_PAGE),
98+
});
99+
let url: string | null = `https://api.github.com/repos/${repo}/commits?${params}`;
100+
const messages: string[] = [];
101+
102+
while (url) {
103+
const result = await fetchPage(token, url);
104+
if (!result) break;
105+
messages.push(...result.commits.map((c) => firstLine(c.commit.message)));
106+
url = result.nextUrl;
92107
}
93108

94-
return [];
109+
return messages;
95110
};
96111

97112
const CONCURRENCY = 5;
@@ -129,16 +144,5 @@ export const fetchCommitMessages = async (
129144
}
130145
});
131146

132-
// Cap total messages after all fetches complete
133-
const capped: RepoCommits[] = [];
134-
let total = 0;
135-
for (const entry of results) {
136-
if (total >= MAX_TOTAL_MESSAGES) break;
137-
const remaining = MAX_TOTAL_MESSAGES - total;
138-
const trimmed = entry.messages.slice(0, remaining);
139-
capped.push({ repo: entry.repo, messages: trimmed });
140-
total += trimmed.length;
141-
}
142-
143-
return capped;
147+
return results;
144148
};

0 commit comments

Comments
 (0)