Skip to content

Commit 04d4f00

Browse files
committed
use gh-code proxy, fail open locally
1 parent 94dc3d1 commit 04d4f00

File tree

1 file changed

+118
-101
lines changed

1 file changed

+118
-101
lines changed

scripts/fetch-skills.ts

Lines changed: 118 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
* 1. Reads configuration from skills.config.json
66
* 2. Validates the repo is in the cloudflare/* org
77
* 3. Checks cache to avoid unnecessary fetches
8-
* 4. Fetches all skills and their reference files from GitHub
9-
* 5. Generates index.json with skill metadata
10-
* 6. Falls back to stale cache on fetch errors
8+
* 4. Uses Git Trees API to list all files (1 API call)
9+
* 5. Fetches file contents via gh-code proxy (no rate limits)
10+
* 6. Generates index.json with skill metadata
11+
* 7. Falls back to stale cache on fetch errors
12+
* 8. Fails open locally (skips skills if fetch fails)
1113
*/
1214

1315
import { mkdir, readFile, writeFile, rm, rename } from "fs/promises";
@@ -40,15 +42,25 @@ interface IndexJson {
4042
skills: SkillMetadata[];
4143
}
4244

43-
interface GitHubContent {
44-
name: string;
45+
interface GitCommit {
46+
sha: string;
47+
}
48+
49+
interface GitTreeItem {
4550
path: string;
46-
type: "file" | "dir";
47-
download_url: string | null;
51+
type: "blob" | "tree";
52+
sha: string;
53+
}
54+
55+
interface GitTreeResponse {
56+
sha: string;
57+
tree: GitTreeItem[];
58+
truncated: boolean;
4859
}
4960

5061
const CACHE_DIR = ".tmp";
5162
const CACHE_FILE = "skills-cache.json";
63+
const GH_CODE_PROXY = "https://gh-code.developers.cloudflare.com";
5264

5365
function log(message: string): void {
5466
console.log(`[fetch-skills] ${message}`);
@@ -62,6 +74,10 @@ function error(message: string): void {
6274
console.error(`[fetch-skills] ERROR: ${message}`);
6375
}
6476

77+
function isCI(): boolean {
78+
return process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true";
79+
}
80+
6581
async function loadConfig(): Promise<SkillsConfig> {
6682
const configPath = join(process.cwd(), "skills.config.json");
6783
const content = await readFile(configPath, "utf-8");
@@ -146,27 +162,17 @@ async function outputExists(config: SkillsConfig): Promise<boolean> {
146162
return existsSync(indexPath);
147163
}
148164

149-
function getGitHubHeaders(): Record<string, string> {
150-
const headers: Record<string, string> = {
151-
Accept: "application/vnd.github.v3+json",
152-
"User-Agent": "cloudflare-docs-skills-fetcher",
153-
};
154-
// Use GITHUB_TOKEN if available (5000 req/hour vs 60 unauthenticated)
155-
if (process.env.GITHUB_TOKEN) {
156-
headers.Authorization = `Bearer ${process.env.GITHUB_TOKEN}`;
157-
}
158-
return headers;
159-
}
160-
161165
async function fetchGitHubJson<T>(url: string): Promise<T> {
162-
const response = await fetch(url, { headers: getGitHubHeaders() });
166+
const response = await fetch(url, {
167+
headers: {
168+
Accept: "application/vnd.github.v3+json",
169+
"User-Agent": "cloudflare-docs-skills-fetcher",
170+
},
171+
});
163172

164173
if (!response.ok) {
165174
if (response.status === 403 || response.status === 429) {
166-
const authHint = process.env.GITHUB_TOKEN
167-
? ""
168-
: " Set GITHUB_TOKEN for higher rate limits.";
169-
throw new Error(`GitHub API rate limit exceeded.${authHint}`);
175+
throw new Error("GitHub API rate limit exceeded.");
170176
}
171177
throw new Error(
172178
`GitHub API error: ${response.status} ${response.statusText}`,
@@ -176,26 +182,12 @@ async function fetchGitHubJson<T>(url: string): Promise<T> {
176182
return response.json();
177183
}
178184

179-
function isValidDownloadUrl(url: string): boolean {
180-
try {
181-
const parsed = new URL(url);
182-
return (
183-
parsed.protocol === "https:" &&
184-
parsed.host === "raw.githubusercontent.com"
185-
);
186-
} catch {
187-
return false;
188-
}
189-
}
190-
191-
async function fetchFileContent(url: string): Promise<string> {
192-
// Validate URL points to GitHub raw content (defense-in-depth)
193-
if (!isValidDownloadUrl(url)) {
194-
throw new Error(
195-
`Invalid download URL (must be raw.githubusercontent.com): ${url}`,
196-
);
197-
}
198-
185+
async function fetchFileContent(
186+
repo: string,
187+
commit: string,
188+
path: string,
189+
): Promise<string> {
190+
const url = `${GH_CODE_PROXY}/${repo}/${commit}/${path}`;
199191
const response = await fetch(url, {
200192
headers: {
201193
"User-Agent": "cloudflare-docs-skills-fetcher",
@@ -204,7 +196,7 @@ async function fetchFileContent(url: string): Promise<string> {
204196

205197
if (!response.ok) {
206198
throw new Error(
207-
`Failed to fetch file: ${response.status} ${response.statusText}`,
199+
`Failed to fetch ${path}: ${response.status} ${response.statusText}`,
208200
);
209201
}
210202

@@ -233,76 +225,78 @@ function parseFrontmatter(content: string): {
233225
};
234226
}
235227

236-
async function fetchDirectoryRecursive(
237-
config: SkillsConfig,
238-
path: string,
239-
): Promise<Array<{ path: string; download_url: string }>> {
240-
const encodedPath = path.split("/").map(encodeURIComponent).join("/");
241-
const url = `https://api.github.com/repos/${config.skills_repo}/contents/${encodedPath}?ref=${encodeURIComponent(config.branch)}`;
242-
const contents = await fetchGitHubJson<GitHubContent[]>(url);
243-
const files: Array<{ path: string; download_url: string }> = [];
244-
245-
for (const item of contents) {
246-
if (item.type === "file" && item.download_url) {
247-
files.push({ path: item.path, download_url: item.download_url });
248-
} else if (item.type === "dir") {
249-
const subFiles = await fetchDirectoryRecursive(config, item.path);
250-
files.push(...subFiles);
251-
}
228+
async function getCommitSha(repo: string, branch: string): Promise<string> {
229+
const url = `https://api.github.com/repos/${repo}/commits/${encodeURIComponent(branch)}`;
230+
const commit = await fetchGitHubJson<GitCommit>(url);
231+
return commit.sha;
232+
}
233+
234+
async function getFileTree(repo: string, sha: string): Promise<GitTreeItem[]> {
235+
const url = `https://api.github.com/repos/${repo}/git/trees/${sha}?recursive=1`;
236+
const tree = await fetchGitHubJson<GitTreeResponse>(url);
237+
238+
if (tree.truncated) {
239+
warn("Git tree was truncated - some files may be missing");
252240
}
253241

254-
return files;
242+
return tree.tree;
255243
}
256244

257245
async function fetchSkill(
258246
config: SkillsConfig,
259247
skillName: string,
248+
skillFiles: string[],
249+
commit: string,
250+
outputDir: string,
260251
): Promise<SkillMetadata | null> {
261-
const skillPath = `${config.skills_path}/${skillName}`;
262252
log(`Fetching skill: ${skillName}`);
253+
const skillPrefix = `${config.skills_path}/${skillName}/`;
263254

264255
try {
265-
// Fetch all files in the skill directory
266-
const files = await fetchDirectoryRecursive(config, skillPath);
267-
268-
// Find and parse SKILL.md for metadata
269-
const skillMdFile = files.find((f) => f.path.endsWith("SKILL.md"));
270-
if (!skillMdFile) {
256+
// Find SKILL.md
257+
const skillMdPath = skillFiles.find((f) => f.endsWith("SKILL.md"));
258+
if (!skillMdPath) {
271259
warn(`No SKILL.md found for ${skillName}, skipping`);
272260
return null;
273261
}
274262

275-
const skillMdContent = await fetchFileContent(skillMdFile.download_url);
263+
// Fetch and parse SKILL.md for metadata
264+
const skillMdContent = await fetchFileContent(
265+
config.skills_repo,
266+
commit,
267+
skillMdPath,
268+
);
276269
const { name, description } = parseFrontmatter(skillMdContent);
277270

278271
// Resolve the output directory for path traversal checks
279-
const resolvedOutputDir = resolve(config.output_dir);
272+
const resolvedOutputDir = resolve(outputDir);
280273

281274
// Write all files to output directory
282275
const relativeFiles: string[] = [];
283-
for (const file of files) {
284-
const relativePath = file.path.replace(
285-
`${config.skills_path}/${skillName}/`,
286-
"",
287-
);
276+
for (const filePath of skillFiles) {
277+
const relativePath = filePath.replace(skillPrefix, "");
288278

289-
const outputPath = join(config.output_dir, skillName, relativePath);
279+
const outputPath = join(outputDir, skillName, relativePath);
290280
const resolvedOutputPath = resolve(outputPath);
291281

292-
// SECURITY: Validate output path is within output directory (prevent path traversal)
282+
// SECURITY: Validate output path is within output directory
293283
if (!resolvedOutputPath.startsWith(resolvedOutputDir + "/")) {
294-
warn(`Skipping file with path traversal attempt: ${file.path}`);
284+
warn(`Skipping file with path traversal attempt: ${filePath}`);
295285
continue;
296286
}
297287

298288
relativeFiles.push(relativePath);
299289
await mkdir(dirname(outputPath), { recursive: true });
300290

301291
try {
302-
const content = await fetchFileContent(file.download_url);
292+
const content = await fetchFileContent(
293+
config.skills_repo,
294+
commit,
295+
filePath,
296+
);
303297
await writeFile(outputPath, content);
304298
} catch (err) {
305-
warn(`Failed to fetch ${file.path}: ${err}`);
299+
warn(`Failed to fetch ${filePath}: ${err}`);
306300
}
307301
}
308302

@@ -318,31 +312,49 @@ async function fetchSkill(
318312
}
319313

320314
async function fetchAllSkills(config: SkillsConfig): Promise<IndexJson> {
321-
const encodedPath = config.skills_path
322-
.split("/")
323-
.map(encodeURIComponent)
324-
.join("/");
325-
const url = `https://api.github.com/repos/${config.skills_repo}/contents/${encodedPath}?ref=${encodeURIComponent(config.branch)}`;
326-
log(`Fetching skill list from ${config.skills_repo}`);
327-
328-
const contents = await fetchGitHubJson<GitHubContent[]>(url);
329-
const skillDirs = contents.filter((item) => item.type === "dir" && item.name);
315+
log(`Fetching skills from ${config.skills_repo}`);
316+
317+
// Step 1: Get commit SHA for branch (1 API call)
318+
log(`Resolving ${config.branch} to commit SHA...`);
319+
const commit = await getCommitSha(config.skills_repo, config.branch);
320+
log(`Resolved to ${commit.slice(0, 7)}`);
321+
322+
// Step 2: Get full file tree (1 API call)
323+
log("Fetching file tree...");
324+
const tree = await getFileTree(config.skills_repo, commit);
325+
326+
// Step 3: Filter to skills path and group by skill
327+
const skillsPrefix = `${config.skills_path}/`;
328+
const skillFiles = tree
329+
.filter(
330+
(item) => item.type === "blob" && item.path.startsWith(skillsPrefix),
331+
)
332+
.map((item) => item.path);
333+
334+
// Group files by skill name
335+
const skillGroups = new Map<string, string[]>();
336+
for (const filePath of skillFiles) {
337+
const relativePath = filePath.slice(skillsPrefix.length);
338+
const skillName = relativePath.split("/")[0];
339+
if (!skillGroups.has(skillName)) {
340+
skillGroups.set(skillName, []);
341+
}
342+
skillGroups.get(skillName)!.push(filePath);
343+
}
330344

331-
log(`Found ${skillDirs.length} skills to fetch`);
345+
log(`Found ${skillGroups.size} skills to fetch`);
332346

333-
// Write to temp directory first, then swap on success (atomic update)
347+
// Step 4: Create temp directory
334348
const tempDir = `${config.output_dir}.tmp`;
335349
if (existsSync(tempDir)) {
336350
await rm(tempDir, { recursive: true });
337351
}
338352
await mkdir(tempDir, { recursive: true });
339353

340-
// Temporarily override output_dir for fetchSkill calls
341-
const tempConfig = { ...config, output_dir: tempDir };
342-
354+
// Step 5: Fetch each skill (file contents via proxy - no rate limit)
343355
const skills: SkillMetadata[] = [];
344-
for (const dir of skillDirs) {
345-
const skill = await fetchSkill(tempConfig, dir.name);
356+
for (const [skillName, files] of skillGroups) {
357+
const skill = await fetchSkill(config, skillName, files, commit, tempDir);
346358
if (skill) {
347359
skills.push(skill);
348360
}
@@ -361,11 +373,9 @@ async function writeIndex(outputDir: string, index: IndexJson): Promise<void> {
361373
}
362374

363375
async function atomicSwap(tempDir: string, finalDir: string): Promise<void> {
364-
// Remove existing output directory if it exists
365376
if (existsSync(finalDir)) {
366377
await rm(finalDir, { recursive: true });
367378
}
368-
// Move temp to final (atomic on same filesystem)
369379
await rename(tempDir, finalDir);
370380
}
371381

@@ -421,14 +431,21 @@ async function main(): Promise<void> {
421431
} catch (err) {
422432
error(`Fetch failed: ${err}`);
423433

424-
// Try to use stale cache (config already loaded successfully)
434+
// Try to use stale cache
425435
const cache = await loadCache();
426436
if (cache && (await outputExists(config))) {
427437
warn(`Using stale cache from ${cache.fetched_at}`);
428438
return;
429439
}
430440

431-
// No cache available, fail the build
441+
// No cache available
442+
if (!isCI()) {
443+
// Fail open locally - skip skills
444+
warn("No cache available. Skipping skills fetch (local dev).");
445+
return;
446+
}
447+
448+
// CI with no cache - fail the build
432449
process.exit(1);
433450
}
434451
}

0 commit comments

Comments
 (0)