@@ -20,6 +20,21 @@ import config from "../../config";
2020
2121const logger = createLogger ( "gh-stream" ) ;
2222
23+ export function githubRawFileUrl (
24+ owner : string ,
25+ repo : string ,
26+ commit : string ,
27+ filePath : string
28+ ) : string {
29+ const encodedPath = filePath
30+ . split ( "/" )
31+ . map ( ( segment ) => encodeURIComponent ( segment ) )
32+ . join ( "/" ) ;
33+ return `https://github.com/${ encodeURIComponent ( owner ) } /${ encodeURIComponent (
34+ repo
35+ ) } /raw/${ encodeURIComponent ( commit ) } /${ encodedPath } `;
36+ }
37+
2338export default class GitHubStream extends GitHubBase {
2439 type : "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream" ;
2540
@@ -64,14 +79,71 @@ export default class GitHubStream extends GitHubBase {
6479 // blob endpoint above returns the raw pointer text instead, so we use this
6580 // as the fallback for LFS files (#95).
6681 private downloadFileViaRaw ( token : string , filePath : string ) {
67- const url = `https://github.com/${ this . data . organization } /${ this . data . repoName } /raw/${ this . data . commit } /${ filePath } ` ;
82+ const url = githubRawFileUrl (
83+ this . data . organization ,
84+ this . data . repoName ,
85+ this . data . commit ,
86+ filePath
87+ ) ;
6888 logger . debug ( "downloading via raw URL (LFS)" , { url } ) ;
6989 return got . stream ( url , {
7090 headers : { authorization : `token ${ token } ` } ,
7191 followRedirect : true ,
7292 } ) ;
7393 }
7494
95+ // Try the blob API, then fall back to the raw URL on statuses where the
96+ // path-based endpoint can still succeed. 422 is the blob endpoint's size
97+ // cap; 404 can happen with stale/invalid blob SHAs while the path still
98+ // exists at the requested commit.
99+ private downloadWithFallback (
100+ token : string ,
101+ sha : string ,
102+ filePath : string
103+ ) : Promise < stream . Readable > {
104+ return new Promise < stream . Readable > ( ( resolve ) => {
105+ const blobStream = this . downloadFile ( token , sha ) ;
106+ let settled = false ;
107+
108+ const fallbackStatuses = new Set ( [ 404 , 422 ] ) ;
109+ const fallbackToRaw = ( statusCode ?: number ) => {
110+ settled = true ;
111+ logger . info ( "blob API failed, falling back to raw URL" , {
112+ filePath,
113+ statusCode,
114+ } ) ;
115+ resolve ( this . downloadFileViaRaw ( token , filePath ) ) ;
116+ } ;
117+
118+ blobStream . on ( "error" , ( err ) => {
119+ if ( settled ) return ;
120+ const statusCode = (
121+ err as { response ?: { statusCode ?: number } }
122+ ) ?. response ?. statusCode ;
123+ if ( statusCode && fallbackStatuses . has ( statusCode ) ) {
124+ fallbackToRaw ( statusCode ) ;
125+ return ;
126+ }
127+ // Other errors: let the normal pipeline handle them.
128+ settled = true ;
129+ const passthrough = new stream . PassThrough ( ) ;
130+ passthrough . destroy ( err ) ;
131+ resolve ( passthrough ) ;
132+ } ) ;
133+
134+ blobStream . on ( "response" , ( response ) => {
135+ if ( settled ) return ;
136+ if ( fallbackStatuses . has ( response . statusCode || 0 ) ) {
137+ blobStream . destroy ( ) ;
138+ fallbackToRaw ( response . statusCode ) ;
139+ return ;
140+ }
141+ settled = true ;
142+ resolve ( this . resolveLfsPointer ( blobStream , token , filePath ) ) ;
143+ } ) ;
144+ } ) ;
145+ }
146+
75147 // Wrap a blob stream so that if its first ~150 bytes look like a Git LFS
76148 // pointer, the bytes are dropped and replaced by a fresh fetch from the
77149 // raw URL endpoint (which resolves LFS automatically). Non-LFS files are
@@ -190,11 +262,14 @@ export default class GitHubStream extends GitHubBase {
190262 } ) ;
191263 }
192264 const token = await this . data . getToken ( ) ;
193- const blobStream = this . downloadFile ( token , expected . sha ) ;
194- // If the blob is a Git LFS pointer, swap to a raw-URL fetch so the
195- // file content (not the pointer text) makes it into the pipeline. See
196- // #95 — Support for Git LFS.
197- const content = this . resolveLfsPointer ( blobStream , token , filePath ) ;
265+
266+ // Try the blob API first, but fall back to the raw URL on recoverable
267+ // blob misses/caps while still preserving LFS pointer handling.
268+ const content = await this . downloadWithFallback (
269+ token ,
270+ expected . sha ,
271+ filePath
272+ ) ;
198273
199274 // duplicate the stream to write it to the storage
200275 const stream1 = content . pipe ( new stream . PassThrough ( ) ) ;
0 commit comments