55 * 1. Reads configuration from skills.config.json
66 * 2. Validates the repo is in the cloudflare/* org
77 * 3. Checks cache to avoid unnecessary fetches
8- * 4. Fetches all skills and their reference files from GitHub
9- * 5. Generates index.json with skill metadata
10- * 6. Falls back to stale cache on fetch errors
8+ * 4. Uses Git Trees API to list all files (1 API call)
9+ * 5. Fetches file contents via gh-code proxy (no rate limits)
10+ * 6. Generates index.json with skill metadata
11+ * 7. Falls back to stale cache on fetch errors
12+ * 8. Fails open locally (skips skills if fetch fails)
1113 */
1214
1315import { mkdir , readFile , writeFile , rm , rename } from "fs/promises" ;
@@ -40,15 +42,25 @@ interface IndexJson {
4042 skills : SkillMetadata [ ] ;
4143}
4244
43- interface GitHubContent {
44- name : string ;
45+ interface GitCommit {
46+ sha : string ;
47+ }
48+
49+ interface GitTreeItem {
4550 path : string ;
46- type : "file" | "dir" ;
47- download_url : string | null ;
51+ type : "blob" | "tree" ;
52+ sha : string ;
53+ }
54+
55+ interface GitTreeResponse {
56+ sha : string ;
57+ tree : GitTreeItem [ ] ;
58+ truncated : boolean ;
4859}
4960
5061const CACHE_DIR = ".tmp" ;
5162const CACHE_FILE = "skills-cache.json" ;
63+ const GH_CODE_PROXY = "https://gh-code.developers.cloudflare.com" ;
5264
5365function log ( message : string ) : void {
5466 console . log ( `[fetch-skills] ${ message } ` ) ;
@@ -62,6 +74,10 @@ function error(message: string): void {
6274 console . error ( `[fetch-skills] ERROR: ${ message } ` ) ;
6375}
6476
77+ function isCI ( ) : boolean {
78+ return process . env . CI === "true" || process . env . GITHUB_ACTIONS === "true" ;
79+ }
80+
6581async function loadConfig ( ) : Promise < SkillsConfig > {
6682 const configPath = join ( process . cwd ( ) , "skills.config.json" ) ;
6783 const content = await readFile ( configPath , "utf-8" ) ;
@@ -146,27 +162,17 @@ async function outputExists(config: SkillsConfig): Promise<boolean> {
146162 return existsSync ( indexPath ) ;
147163}
148164
149- function getGitHubHeaders ( ) : Record < string , string > {
150- const headers : Record < string , string > = {
151- Accept : "application/vnd.github.v3+json" ,
152- "User-Agent" : "cloudflare-docs-skills-fetcher" ,
153- } ;
154- // Use GITHUB_TOKEN if available (5000 req/hour vs 60 unauthenticated)
155- if ( process . env . GITHUB_TOKEN ) {
156- headers . Authorization = `Bearer ${ process . env . GITHUB_TOKEN } ` ;
157- }
158- return headers ;
159- }
160-
161165async function fetchGitHubJson < T > ( url : string ) : Promise < T > {
162- const response = await fetch ( url , { headers : getGitHubHeaders ( ) } ) ;
166+ const response = await fetch ( url , {
167+ headers : {
168+ Accept : "application/vnd.github.v3+json" ,
169+ "User-Agent" : "cloudflare-docs-skills-fetcher" ,
170+ } ,
171+ } ) ;
163172
164173 if ( ! response . ok ) {
165174 if ( response . status === 403 || response . status === 429 ) {
166- const authHint = process . env . GITHUB_TOKEN
167- ? ""
168- : " Set GITHUB_TOKEN for higher rate limits." ;
169- throw new Error ( `GitHub API rate limit exceeded.${ authHint } ` ) ;
175+ throw new Error ( "GitHub API rate limit exceeded." ) ;
170176 }
171177 throw new Error (
172178 `GitHub API error: ${ response . status } ${ response . statusText } ` ,
@@ -176,26 +182,12 @@ async function fetchGitHubJson<T>(url: string): Promise<T> {
176182 return response . json ( ) ;
177183}
178184
179- function isValidDownloadUrl ( url : string ) : boolean {
180- try {
181- const parsed = new URL ( url ) ;
182- return (
183- parsed . protocol === "https:" &&
184- parsed . host === "raw.githubusercontent.com"
185- ) ;
186- } catch {
187- return false ;
188- }
189- }
190-
191- async function fetchFileContent ( url : string ) : Promise < string > {
192- // Validate URL points to GitHub raw content (defense-in-depth)
193- if ( ! isValidDownloadUrl ( url ) ) {
194- throw new Error (
195- `Invalid download URL (must be raw.githubusercontent.com): ${ url } ` ,
196- ) ;
197- }
198-
185+ async function fetchFileContent (
186+ repo : string ,
187+ commit : string ,
188+ path : string ,
189+ ) : Promise < string > {
190+ const url = `${ GH_CODE_PROXY } /${ repo } /${ commit } /${ path } ` ;
199191 const response = await fetch ( url , {
200192 headers : {
201193 "User-Agent" : "cloudflare-docs-skills-fetcher" ,
@@ -204,7 +196,7 @@ async function fetchFileContent(url: string): Promise<string> {
204196
205197 if ( ! response . ok ) {
206198 throw new Error (
207- `Failed to fetch file : ${ response . status } ${ response . statusText } ` ,
199+ `Failed to fetch ${ path } : ${ response . status } ${ response . statusText } ` ,
208200 ) ;
209201 }
210202
@@ -233,76 +225,78 @@ function parseFrontmatter(content: string): {
233225 } ;
234226}
235227
236- async function fetchDirectoryRecursive (
237- config : SkillsConfig ,
238- path : string ,
239- ) : Promise < Array < { path : string ; download_url : string } > > {
240- const encodedPath = path . split ( "/" ) . map ( encodeURIComponent ) . join ( "/" ) ;
241- const url = `https://api.github.com/repos/${ config . skills_repo } /contents/${ encodedPath } ?ref=${ encodeURIComponent ( config . branch ) } ` ;
242- const contents = await fetchGitHubJson < GitHubContent [ ] > ( url ) ;
243- const files : Array < { path : string ; download_url : string } > = [ ] ;
244-
245- for ( const item of contents ) {
246- if ( item . type === "file" && item . download_url ) {
247- files . push ( { path : item . path , download_url : item . download_url } ) ;
248- } else if ( item . type === "dir" ) {
249- const subFiles = await fetchDirectoryRecursive ( config , item . path ) ;
250- files . push ( ...subFiles ) ;
251- }
228+ async function getCommitSha ( repo : string , branch : string ) : Promise < string > {
229+ const url = `https://api.github.com/repos/${ repo } /commits/${ encodeURIComponent ( branch ) } ` ;
230+ const commit = await fetchGitHubJson < GitCommit > ( url ) ;
231+ return commit . sha ;
232+ }
233+
234+ async function getFileTree ( repo : string , sha : string ) : Promise < GitTreeItem [ ] > {
235+ const url = `https://api.github.com/repos/${ repo } /git/trees/${ sha } ?recursive=1` ;
236+ const tree = await fetchGitHubJson < GitTreeResponse > ( url ) ;
237+
238+ if ( tree . truncated ) {
239+ warn ( "Git tree was truncated - some files may be missing" ) ;
252240 }
253241
254- return files ;
242+ return tree . tree ;
255243}
256244
257245async function fetchSkill (
258246 config : SkillsConfig ,
259247 skillName : string ,
248+ skillFiles : string [ ] ,
249+ commit : string ,
250+ outputDir : string ,
260251) : Promise < SkillMetadata | null > {
261- const skillPath = `${ config . skills_path } /${ skillName } ` ;
262252 log ( `Fetching skill: ${ skillName } ` ) ;
253+ const skillPrefix = `${ config . skills_path } /${ skillName } /` ;
263254
264255 try {
265- // Fetch all files in the skill directory
266- const files = await fetchDirectoryRecursive ( config , skillPath ) ;
267-
268- // Find and parse SKILL.md for metadata
269- const skillMdFile = files . find ( ( f ) => f . path . endsWith ( "SKILL.md" ) ) ;
270- if ( ! skillMdFile ) {
256+ // Find SKILL.md
257+ const skillMdPath = skillFiles . find ( ( f ) => f . endsWith ( "SKILL.md" ) ) ;
258+ if ( ! skillMdPath ) {
271259 warn ( `No SKILL.md found for ${ skillName } , skipping` ) ;
272260 return null ;
273261 }
274262
275- const skillMdContent = await fetchFileContent ( skillMdFile . download_url ) ;
263+ // Fetch and parse SKILL.md for metadata
264+ const skillMdContent = await fetchFileContent (
265+ config . skills_repo ,
266+ commit ,
267+ skillMdPath ,
268+ ) ;
276269 const { name, description } = parseFrontmatter ( skillMdContent ) ;
277270
278271 // Resolve the output directory for path traversal checks
279- const resolvedOutputDir = resolve ( config . output_dir ) ;
272+ const resolvedOutputDir = resolve ( outputDir ) ;
280273
281274 // Write all files to output directory
282275 const relativeFiles : string [ ] = [ ] ;
283- for ( const file of files ) {
284- const relativePath = file . path . replace (
285- `${ config . skills_path } /${ skillName } /` ,
286- "" ,
287- ) ;
276+ for ( const filePath of skillFiles ) {
277+ const relativePath = filePath . replace ( skillPrefix , "" ) ;
288278
289- const outputPath = join ( config . output_dir , skillName , relativePath ) ;
279+ const outputPath = join ( outputDir , skillName , relativePath ) ;
290280 const resolvedOutputPath = resolve ( outputPath ) ;
291281
292- // SECURITY: Validate output path is within output directory (prevent path traversal)
282+ // SECURITY: Validate output path is within output directory
293283 if ( ! resolvedOutputPath . startsWith ( resolvedOutputDir + "/" ) ) {
294- warn ( `Skipping file with path traversal attempt: ${ file . path } ` ) ;
284+ warn ( `Skipping file with path traversal attempt: ${ filePath } ` ) ;
295285 continue ;
296286 }
297287
298288 relativeFiles . push ( relativePath ) ;
299289 await mkdir ( dirname ( outputPath ) , { recursive : true } ) ;
300290
301291 try {
302- const content = await fetchFileContent ( file . download_url ) ;
292+ const content = await fetchFileContent (
293+ config . skills_repo ,
294+ commit ,
295+ filePath ,
296+ ) ;
303297 await writeFile ( outputPath , content ) ;
304298 } catch ( err ) {
305- warn ( `Failed to fetch ${ file . path } : ${ err } ` ) ;
299+ warn ( `Failed to fetch ${ filePath } : ${ err } ` ) ;
306300 }
307301 }
308302
@@ -318,31 +312,49 @@ async function fetchSkill(
318312}
319313
320314async function fetchAllSkills ( config : SkillsConfig ) : Promise < IndexJson > {
321- const encodedPath = config . skills_path
322- . split ( "/" )
323- . map ( encodeURIComponent )
324- . join ( "/" ) ;
325- const url = `https://api.github.com/repos/${ config . skills_repo } /contents/${ encodedPath } ?ref=${ encodeURIComponent ( config . branch ) } ` ;
326- log ( `Fetching skill list from ${ config . skills_repo } ` ) ;
327-
328- const contents = await fetchGitHubJson < GitHubContent [ ] > ( url ) ;
329- const skillDirs = contents . filter ( ( item ) => item . type === "dir" && item . name ) ;
315+ log ( `Fetching skills from ${ config . skills_repo } ` ) ;
316+
317+ // Step 1: Get commit SHA for branch (1 API call)
318+ log ( `Resolving ${ config . branch } to commit SHA...` ) ;
319+ const commit = await getCommitSha ( config . skills_repo , config . branch ) ;
320+ log ( `Resolved to ${ commit . slice ( 0 , 7 ) } ` ) ;
321+
322+ // Step 2: Get full file tree (1 API call)
323+ log ( "Fetching file tree..." ) ;
324+ const tree = await getFileTree ( config . skills_repo , commit ) ;
325+
326+ // Step 3: Filter to skills path and group by skill
327+ const skillsPrefix = `${ config . skills_path } /` ;
328+ const skillFiles = tree
329+ . filter (
330+ ( item ) => item . type === "blob" && item . path . startsWith ( skillsPrefix ) ,
331+ )
332+ . map ( ( item ) => item . path ) ;
333+
334+ // Group files by skill name
335+ const skillGroups = new Map < string , string [ ] > ( ) ;
336+ for ( const filePath of skillFiles ) {
337+ const relativePath = filePath . slice ( skillsPrefix . length ) ;
338+ const skillName = relativePath . split ( "/" ) [ 0 ] ;
339+ if ( ! skillGroups . has ( skillName ) ) {
340+ skillGroups . set ( skillName , [ ] ) ;
341+ }
342+ skillGroups . get ( skillName ) ! . push ( filePath ) ;
343+ }
330344
331- log ( `Found ${ skillDirs . length } skills to fetch` ) ;
345+ log ( `Found ${ skillGroups . size } skills to fetch` ) ;
332346
333- // Write to temp directory first, then swap on success (atomic update)
347+ // Step 4: Create temp directory
334348 const tempDir = `${ config . output_dir } .tmp` ;
335349 if ( existsSync ( tempDir ) ) {
336350 await rm ( tempDir , { recursive : true } ) ;
337351 }
338352 await mkdir ( tempDir , { recursive : true } ) ;
339353
340- // Temporarily override output_dir for fetchSkill calls
341- const tempConfig = { ...config , output_dir : tempDir } ;
342-
354+ // Step 5: Fetch each skill (file contents via proxy - no rate limit)
343355 const skills : SkillMetadata [ ] = [ ] ;
344- for ( const dir of skillDirs ) {
345- const skill = await fetchSkill ( tempConfig , dir . name ) ;
356+ for ( const [ skillName , files ] of skillGroups ) {
357+ const skill = await fetchSkill ( config , skillName , files , commit , tempDir ) ;
346358 if ( skill ) {
347359 skills . push ( skill ) ;
348360 }
@@ -361,11 +373,9 @@ async function writeIndex(outputDir: string, index: IndexJson): Promise<void> {
361373}
362374
363375async function atomicSwap ( tempDir : string , finalDir : string ) : Promise < void > {
364- // Remove existing output directory if it exists
365376 if ( existsSync ( finalDir ) ) {
366377 await rm ( finalDir , { recursive : true } ) ;
367378 }
368- // Move temp to final (atomic on same filesystem)
369379 await rename ( tempDir , finalDir ) ;
370380}
371381
@@ -421,14 +431,21 @@ async function main(): Promise<void> {
421431 } catch ( err ) {
422432 error ( `Fetch failed: ${ err } ` ) ;
423433
424- // Try to use stale cache (config already loaded successfully)
434+ // Try to use stale cache
425435 const cache = await loadCache ( ) ;
426436 if ( cache && ( await outputExists ( config ) ) ) {
427437 warn ( `Using stale cache from ${ cache . fetched_at } ` ) ;
428438 return ;
429439 }
430440
431- // No cache available, fail the build
441+ // No cache available
442+ if ( ! isCI ( ) ) {
443+ // Fail open locally - skip skills
444+ warn ( "No cache available. Skipping skills fetch (local dev)." ) ;
445+ return ;
446+ }
447+
448+ // CI with no cache - fail the build
432449 process . exit ( 1 ) ;
433450 }
434451}
0 commit comments