Skip to content

Commit 67cb253

Browse files
committed
fix old github download repos
1 parent da78708 commit 67cb253

6 files changed

Lines changed: 154 additions & 22 deletions

File tree

public/script/app.js

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,9 @@ angular
593593
if (dir) {
594594
output += `<a ng-click="openFolder('${path}', $event)">${name}</a>`;
595595
} else {
596-
output += `<a href='/r/${$scope.repoId}${path}'>${name}</a>`;
596+
output += `<a href='/r/${$scope.repoId}${encodePathForUrl(
597+
path
598+
)}'>${name}</a>`;
597599
}
598600
if (truncated) {
599601
output += `<span class="truncated-warning" title="{{ 'WARNINGS.folder_truncated' | translate }}"><i class="fas fa-exclamation-triangle"></i></span>`;
@@ -2255,7 +2257,9 @@ angular
22552257
}
22562258

22572259
// redirect to readme
2258-
$location.url(uri + readmeCandidates[best_match]);
2260+
$location.url(
2261+
uri + encodePathForUrl(readmeCandidates[best_match])
2262+
);
22592263
}
22602264
}
22612265
$scope.getFiles = async function (path) {
@@ -2353,11 +2357,15 @@ angular
23532357
// server returns a fresh ETag on first hit either way.
23542358
const sha = (fileInfo && fileInfo.sha) || "0";
23552359
$http
2356-
.get(`/api/repo/${$scope.repoId}/file/${path}?v=` + sha, {
2357-
transformResponse: (data) => {
2358-
return data;
2359-
},
2360-
})
2360+
.get(
2361+
`/api/repo/${$scope.repoId}/file/${encodePathForUrl(path)}?v=` +
2362+
sha,
2363+
{
2364+
transformResponse: (data) => {
2365+
return data;
2366+
},
2367+
}
2368+
)
23612369
.then(
23622370
(res) => {
23632371
$scope.type = originalType;
@@ -2427,7 +2435,9 @@ angular
24272435
if ($scope.file && $scope.file.sha) {
24282436
fileVersion = $scope.file.sha;
24292437
}
2430-
$scope.url = `/api/repo/${$scope.repoId}/file/${$scope.filePath}?v=${fileVersion}`;
2438+
$scope.url = `/api/repo/${$scope.repoId}/file/${encodePathForUrl(
2439+
$scope.filePath
2440+
)}?v=${fileVersion}`;
24312441

24322442
let extension = $scope.filePath.toLowerCase();
24332443
const extensionIndex = extension.lastIndexOf(".");

public/script/bundle.min.js

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

public/script/utils.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ function saveFilterPrefs(key, value) {
1515
}
1616
}
1717

18+
function encodePathForUrl(path) {
19+
return String(path || "")
20+
.split("/")
21+
.map((segment) => encodeURIComponent(segment))
22+
.join("/");
23+
}
24+
1825
function humanFileSize(bytes, si = false, dp = 1) {
1926
const thresh = si ? 1000 : 1024;
2027

src/core/Repository.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,21 @@ export default class Repository {
141141
force: false,
142142
}
143143
): Promise<IFile[]> {
144-
const hasFile = await FileModel.exists({ repoId: this.repoId }).exec();
144+
let hasFile = await FileModel.exists({ repoId: this.repoId }).exec();
145+
// Files created by GitHubDownload don't carry a valid 40-char GitHub
146+
// blob SHA. When the source type later switches to GitHubStream the
147+
// stale entries cause blob-API 404s. Detect this by sampling a file
148+
// with a sha and checking its length; force a re-fetch if it doesn't
149+
// look like a GitHub SHA.
150+
if (hasFile && this.source instanceof GitHubStream) {
151+
const sample = await FileModel.findOne(
152+
{ repoId: this.repoId, sha: { $exists: true, $ne: null } },
153+
{ sha: 1 }
154+
).exec();
155+
if (sample?.sha && sample.sha.length !== 40) {
156+
hasFile = null;
157+
}
158+
}
145159
if (!hasFile || opt.force) {
146160
await FileModel.deleteMany({ repoId: this.repoId }).exec();
147161
const files = await this.source.getFiles(opt.progress);

src/core/source/GitHubStream.ts

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,21 @@ import config from "../../config";
2020

2121
const logger = createLogger("gh-stream");
2222

23+
export function githubRawFileUrl(
24+
owner: string,
25+
repo: string,
26+
commit: string,
27+
filePath: string
28+
): string {
29+
const encodedPath = filePath
30+
.split("/")
31+
.map((segment) => encodeURIComponent(segment))
32+
.join("/");
33+
return `https://github.com/${encodeURIComponent(owner)}/${encodeURIComponent(
34+
repo
35+
)}/raw/${encodeURIComponent(commit)}/${encodedPath}`;
36+
}
37+
2338
export default class GitHubStream extends GitHubBase {
2439
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
2540

@@ -64,14 +79,71 @@ export default class GitHubStream extends GitHubBase {
6479
// blob endpoint above returns the raw pointer text instead, so we use this
6580
// as the fallback for LFS files (#95).
6681
private downloadFileViaRaw(token: string, filePath: string) {
67-
const url = `https://github.com/${this.data.organization}/${this.data.repoName}/raw/${this.data.commit}/${filePath}`;
82+
const url = githubRawFileUrl(
83+
this.data.organization,
84+
this.data.repoName,
85+
this.data.commit,
86+
filePath
87+
);
6888
logger.debug("downloading via raw URL (LFS)", { url });
6989
return got.stream(url, {
7090
headers: { authorization: `token ${token}` },
7191
followRedirect: true,
7292
});
7393
}
7494

95+
// Try the blob API, then fall back to the raw URL on statuses where the
96+
// path-based endpoint can still succeed. 422 is the blob endpoint's size
97+
// cap; 404 can happen with stale/invalid blob SHAs while the path still
98+
// exists at the requested commit.
99+
private downloadWithFallback(
100+
token: string,
101+
sha: string,
102+
filePath: string
103+
): Promise<stream.Readable> {
104+
return new Promise<stream.Readable>((resolve) => {
105+
const blobStream = this.downloadFile(token, sha);
106+
let settled = false;
107+
108+
const fallbackStatuses = new Set([404, 422]);
109+
const fallbackToRaw = (statusCode?: number) => {
110+
settled = true;
111+
logger.info("blob API failed, falling back to raw URL", {
112+
filePath,
113+
statusCode,
114+
});
115+
resolve(this.downloadFileViaRaw(token, filePath));
116+
};
117+
118+
blobStream.on("error", (err) => {
119+
if (settled) return;
120+
const statusCode = (
121+
err as { response?: { statusCode?: number } }
122+
)?.response?.statusCode;
123+
if (statusCode && fallbackStatuses.has(statusCode)) {
124+
fallbackToRaw(statusCode);
125+
return;
126+
}
127+
// Other errors: let the normal pipeline handle them.
128+
settled = true;
129+
const passthrough = new stream.PassThrough();
130+
passthrough.destroy(err);
131+
resolve(passthrough);
132+
});
133+
134+
blobStream.on("response", (response) => {
135+
if (settled) return;
136+
if (fallbackStatuses.has(response.statusCode || 0)) {
137+
blobStream.destroy();
138+
fallbackToRaw(response.statusCode);
139+
return;
140+
}
141+
settled = true;
142+
resolve(this.resolveLfsPointer(blobStream, token, filePath));
143+
});
144+
});
145+
}
146+
75147
// Wrap a blob stream so that if its first ~150 bytes look like a Git LFS
76148
// pointer, the bytes are dropped and replaced by a fresh fetch from the
77149
// raw URL endpoint (which resolves LFS automatically). Non-LFS files are
@@ -190,11 +262,14 @@ export default class GitHubStream extends GitHubBase {
190262
});
191263
}
192264
const token = await this.data.getToken();
193-
const blobStream = this.downloadFile(token, expected.sha);
194-
// If the blob is a Git LFS pointer, swap to a raw-URL fetch so the
195-
// file content (not the pointer text) makes it into the pipeline. See
196-
// #95 — Support for Git LFS.
197-
const content = this.resolveLfsPointer(blobStream, token, filePath);
265+
266+
// Try the blob API first, but fall back to the raw URL on recoverable
267+
// blob misses/caps while still preserving LFS pointer handling.
268+
const content = await this.downloadWithFallback(
269+
token,
270+
expected.sha,
271+
filePath
272+
);
198273

199274
// duplicate the stream to write it to the storage
200275
const stream1 = content.pipe(new stream.PassThrough());

src/server/routes/file.ts

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,40 @@ import { fileETag } from "./file-etag";
66

77
export const router = express.Router();
88

9+
function decodePathSegment(segment: string): string {
10+
try {
11+
return decodeURIComponent(segment);
12+
} catch {
13+
try {
14+
return decodeURI(segment);
15+
} catch {
16+
return segment;
17+
}
18+
}
19+
}
20+
21+
export function filePathFromRequestUrl(
22+
reqUrl: string,
23+
protocol: string,
24+
hostname: string,
25+
repoId: string
26+
): string {
27+
const pathname = new URL(reqUrl, `${protocol}://${hostname}`).pathname;
28+
const prefix = `/${encodeURIComponent(repoId)}/file/`;
29+
const rawPath = pathname.startsWith(prefix)
30+
? pathname.substring(prefix.length)
31+
: pathname.replace(`/${repoId}/file/`, "");
32+
return rawPath.split("/").map(decodePathSegment).join("/");
33+
}
34+
935
router.get(
1036
"/:repoId/file/:path*",
1137
async (req: express.Request, res: express.Response) => {
12-
const anonymizedPath = decodeURI(
13-
new URL(req.url, `${req.protocol}://${req.hostname}`).pathname.replace(
14-
`/${req.params.repoId}/file/`,
15-
""
16-
)
38+
const anonymizedPath = filePathFromRequestUrl(
39+
req.url,
40+
req.protocol,
41+
req.hostname,
42+
req.params.repoId
1743
);
1844
if (anonymizedPath.endsWith("/")) {
1945
return handleError(

0 commit comments

Comments
 (0)