Skip to content

Commit 9812eae

Browse files
committed
feat(catalog): add docset size and tarix
1 parent 930b674 commit 9812eae

12 files changed

Lines changed: 720 additions & 16 deletions

scripts/build-catalog.ts

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ import { processFeeds } from "./process-dash-feeds";
33
import { processContrib } from "./process-contrib";
44
import { processCheatsheets } from "./process-cheatsheets";
55
import { fetchReleases } from "./fetch-releases";
6+
import { diffCatalog, fetchBaseline, type DiffableEntry } from "./catalog-diff";
7+
import { enrichMetadata } from "./probe-metadata";
8+
9+
const DEFAULT_BASELINE_URL = "https://api.zealdocs.org/_api/v1/catalog.json";
610

711
const { values } = parseArgs({
812
args: Bun.argv.slice(2),
@@ -11,6 +15,8 @@ const { values } = parseArgs({
1115
blacklist: { type: "string", default: "blacklist.json" },
1216
"resource-dir": { type: "string" },
1317
"feed-dir": { type: "string" },
18+
// Previously-deployed catalog used as a diff baseline. Pass "" to skip.
19+
baseline: { type: "string", default: DEFAULT_BASELINE_URL },
1420
},
1521
allowPositionals: false,
1622
});
@@ -40,7 +46,49 @@ console.log("\nProcessing cheatsheets...");
4046
const cheatsheetEntries = await processCheatsheets({ resourceDir });
4147
console.log(` ${cheatsheetEntries.length} cheatsheets fetched.`);
4248

43-
// Build com.kapeli legacy merged catalog (official + suffixed contrib + suffixed cheatsheet)
49+
// Full catalog (3 individual sources flat-merged). These objects are enriched
50+
// in place below, before the legacy catalog copies them, so both outputs carry
51+
// the size/tarix metadata.
52+
const catalogEntries = [...officialEntries, ...contribEntries, ...cheatsheetEntries];
53+
54+
// Diff against the previously-deployed catalog so unchanged docsets reuse their
55+
// metadata instead of being re-probed.
56+
let baseline: DiffableEntry[] = [];
57+
if (values.baseline) {
58+
try {
59+
baseline = await fetchBaseline(values.baseline);
60+
} catch (err) {
61+
console.warn(`\nWarning: baseline unavailable, probing all docsets: ${err}`);
62+
}
63+
} else {
64+
console.log("\nBaseline disabled; probing all docsets.");
65+
}
66+
67+
const diff = diffCatalog(catalogEntries, baseline);
68+
console.log(
69+
`\nCatalog diff: ${diff.unchanged.length} unchanged, ${diff.changed.length} changed, ` +
70+
`${diff.added.length} added, ${diff.removed.length} removed`,
71+
);
72+
if (diff.changed.length) console.log(` changed: ${diff.changed.join(", ")}`);
73+
if (diff.added.length) console.log(` added: ${diff.added.join(", ")}`);
74+
if (diff.removed.length) console.log(` removed: ${diff.removed.join(", ")}`);
75+
76+
console.log("\nProbing download sizes and tarix availability...");
77+
let meta = { reused: 0, probed: 0, failed: 0, skipped: 0 };
78+
try {
79+
meta = await enrichMetadata({ entries: catalogEntries, diff, baseline, manifest });
80+
} catch (err) {
81+
console.warn(` Warning: metadata probing failed: ${err}`);
82+
}
83+
console.log(
84+
` ${meta.reused} reused, ${meta.probed} probed, ${meta.failed} failed` +
85+
(meta.skipped ? `, ${meta.skipped} skipped` : ""),
86+
);
87+
88+
catalogEntries.sort((a, b) => a.name.toLowerCase().localeCompare(b.name.toLowerCase()));
89+
90+
// Legacy merged catalog (official + suffixed contrib + suffixed cheatsheet),
91+
// derived after enrichment so the copies inherit size/tarix.
4492
const legacyEntries = [
4593
...officialEntries.map((e) => ({ ...e, sourceId: "com.kapeli" })),
4694
...contribEntries.map((e) => ({
@@ -58,10 +106,6 @@ const legacyEntries = [
58106
];
59107
legacyEntries.sort((a, b) => a.name.toLowerCase().localeCompare(b.name.toLowerCase()));
60108

61-
// Build full catalog (3 individual sources flat-merged)
62-
const catalogEntries = [...officialEntries, ...contribEntries, ...cheatsheetEntries];
63-
catalogEntries.sort((a, b) => a.name.toLowerCase().localeCompare(b.name.toLowerCase()));
64-
65109
console.log("\nFetching Zeal releases from GitHub...");
66110
let releases: Awaited<ReturnType<typeof fetchReleases>> = [];
67111
try {

scripts/catalog-diff.test.ts

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import { describe, it, expect, mock } from "bun:test";
2+
3+
let baselineJson: unknown = [];
4+
mock.module("./fetch-retry", () => ({
5+
fetchWithRetry: async (_url: string): Promise<Response> => ({ json: async () => baselineJson }) as Response,
6+
}));
7+
8+
import { entryKey, fingerprint, diffCatalog, fetchBaseline, type DiffableEntry } from "./catalog-diff";
9+
10+
describe("entryKey", () => {
11+
it("combines sourceId and name", () => {
12+
expect(entryKey({ sourceId: "com.kapeli.dash", name: "Bash" })).toBe("com.kapeli.dash/Bash");
13+
});
14+
15+
it("distinguishes same name across sources", () => {
16+
const a = entryKey({ sourceId: "com.kapeli.dash", name: "Vim" });
17+
const b = entryKey({ sourceId: "com.kapeli.cheatsheet", name: "Vim" });
18+
expect(a).not.toBe(b);
19+
});
20+
});
21+
22+
describe("fingerprint", () => {
23+
it("dash: changes with revision or version list", () => {
24+
const base: DiffableEntry = { name: "Bash", sourceId: "com.kapeli.dash", revision: "9", versions: ["9"] };
25+
expect(fingerprint(base)).toBe(fingerprint({ ...base }));
26+
expect(fingerprint({ ...base, revision: "10" })).not.toBe(fingerprint(base));
27+
expect(fingerprint({ ...base, versions: ["9", "8"] })).not.toBe(fingerprint(base));
28+
});
29+
30+
it("contrib: changes with archive or specific versions", () => {
31+
const base: DiffableEntry = {
32+
name: "Jest",
33+
sourceId: "com.kapeli.contrib",
34+
versions: ["29.0"],
35+
archive: "Jest.tgz",
36+
specificVersions: { "29.0": "versions/29.0/Jest.tgz" },
37+
};
38+
expect(fingerprint(base)).toBe(fingerprint({ ...base }));
39+
expect(fingerprint({ ...base, archive: "Jest-new.tgz" })).not.toBe(fingerprint(base));
40+
expect(fingerprint({ ...base, specificVersions: { "29.0": "x", "28.0": "y" } })).not.toBe(fingerprint(base));
41+
});
42+
43+
it("contrib: specific-version order does not matter", () => {
44+
const a: DiffableEntry = {
45+
name: "Jest",
46+
sourceId: "com.kapeli.contrib",
47+
versions: ["29.0"],
48+
archive: "Jest.tgz",
49+
specificVersions: { "29.0": "a", "28.0": "b" },
50+
};
51+
const b: DiffableEntry = { ...a, specificVersions: { "28.0": "b", "29.0": "a" } };
52+
expect(fingerprint(a)).toBe(fingerprint(b));
53+
});
54+
55+
it("contrib: changes when a specific-version archive path changes", () => {
56+
const a: DiffableEntry = {
57+
name: "Jest",
58+
sourceId: "com.kapeli.contrib",
59+
versions: ["29.0"],
60+
archive: "Jest.tgz",
61+
specificVersions: { "29.0": "versions/29.0/Jest.tgz" },
62+
};
63+
const b: DiffableEntry = { ...a, specificVersions: { "29.0": "versions/29.0/Jest-v2.tgz" } };
64+
expect(fingerprint(a)).not.toBe(fingerprint(b));
65+
});
66+
67+
it("cheatsheet: changes only with version", () => {
68+
const base: DiffableEntry = { name: "Vim", sourceId: "com.kapeli.cheatsheet", versions: ["1"] };
69+
expect(fingerprint(base)).toBe(fingerprint({ ...base }));
70+
expect(fingerprint({ ...base, versions: ["2"] })).not.toBe(fingerprint(base));
71+
});
72+
});
73+
74+
describe("diffCatalog", () => {
75+
const baseline: DiffableEntry[] = [
76+
{ name: "Bash", sourceId: "com.kapeli.dash", revision: "9", versions: ["9"] },
77+
{ name: "Go", sourceId: "com.kapeli.dash", revision: "1", versions: ["1.26"] },
78+
{ name: "Vim", sourceId: "com.kapeli.cheatsheet", versions: ["1"] },
79+
];
80+
81+
it("classifies unchanged, changed, added, removed", () => {
82+
const current: DiffableEntry[] = [
83+
{ name: "Bash", sourceId: "com.kapeli.dash", revision: "9", versions: ["9"] }, // unchanged
84+
{ name: "Go", sourceId: "com.kapeli.dash", revision: "2", versions: ["1.27"] }, // changed
85+
{ name: "Rust", sourceId: "com.kapeli.dash", revision: "1", versions: ["1.0"] }, // added
86+
// Vim cheatsheet dropped -> removed
87+
];
88+
const diff = diffCatalog(current, baseline);
89+
expect(diff.unchanged).toEqual(["com.kapeli.dash/Bash"]);
90+
expect(diff.changed).toEqual(["com.kapeli.dash/Go"]);
91+
expect(diff.added).toEqual(["com.kapeli.dash/Rust"]);
92+
expect(diff.removed).toEqual(["com.kapeli.cheatsheet/Vim"]);
93+
});
94+
95+
it("treats an empty baseline as all-added", () => {
96+
const diff = diffCatalog(baseline, []);
97+
expect(diff.added).toHaveLength(3);
98+
expect(diff.unchanged).toHaveLength(0);
99+
expect(diff.changed).toHaveLength(0);
100+
expect(diff.removed).toHaveLength(0);
101+
});
102+
});
103+
104+
describe("fetchBaseline", () => {
105+
it("drops malformed entries missing name or sourceId", async () => {
106+
baselineJson = [
107+
{ name: "Bash", sourceId: "com.kapeli.dash" },
108+
{ name: "NoSource" },
109+
{ sourceId: "com.kapeli.dash" },
110+
null,
111+
];
112+
const entries = await fetchBaseline("https://example/catalog.json");
113+
expect(entries).toEqual([{ name: "Bash", sourceId: "com.kapeli.dash" }]);
114+
});
115+
116+
it("throws when the payload is not an array", async () => {
117+
baselineJson = { not: "an array" };
118+
await expect(fetchBaseline("https://example/catalog.json")).rejects.toThrow();
119+
});
120+
});

scripts/catalog-diff.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import { fetchWithRetry } from "./fetch-retry";
2+
3+
/**
4+
* Minimal shape needed to diff a docset entry. Both freshly-built entries and
5+
* baseline entries parsed from the live catalog.json satisfy this.
6+
*/
7+
export type DiffableEntry = {
8+
name: string;
9+
sourceId: string;
10+
revision?: string;
11+
versions?: string[];
12+
archive?: string;
13+
specificVersions?: Record<string, string>;
14+
size?: number;
15+
tarix?: boolean;
16+
};
17+
18+
export function entryKey(e: { sourceId: string; name: string }): string {
19+
return `${e.sourceId}/${e.name}`;
20+
}
21+
22+
/**
23+
* A fingerprint derived from each source's version/revision signal, used to
24+
* detect which docsets changed between builds. It catches version/revision
25+
* changes but not a tarball repacked in place under an unchanged version, so
26+
* metadata reused on a fingerprint match (e.g. size) is best-effort, not exact.
27+
*/
28+
export function fingerprint(e: DiffableEntry): string {
29+
const versions = (e.versions ?? []).join(",");
30+
switch (e.sourceId) {
31+
case "com.kapeli.dash":
32+
return `${e.revision ?? "0"}|${versions}`;
33+
case "com.kapeli.contrib": {
34+
const specific = Object.entries(e.specificVersions ?? {})
35+
.sort(([a], [b]) => a.localeCompare(b))
36+
.map(([version, archivePath]) => `${version}:${archivePath}`)
37+
.join(",");
38+
return `${versions}|${e.archive ?? ""}|${specific}`;
39+
}
40+
case "com.kapeli.cheatsheet":
41+
// Cheatsheet download URLs are unversioned; version is the only signal.
42+
return versions;
43+
default:
44+
// Unknown source: fold in every field so any change is caught.
45+
return `${e.revision ?? "0"}|${versions}|${e.archive ?? ""}`;
46+
}
47+
}
48+
49+
// Values are entryKey() strings; added/removed are relative to the baseline.
50+
export type CatalogDiff = {
51+
unchanged: string[];
52+
changed: string[];
53+
added: string[];
54+
removed: string[];
55+
};
56+
57+
export function diffCatalog(current: DiffableEntry[], baseline: DiffableEntry[]): CatalogDiff {
58+
const baseMap = new Map(baseline.map((e) => [entryKey(e), e]));
59+
const currentKeys = new Set<string>();
60+
const diff: CatalogDiff = { unchanged: [], changed: [], added: [], removed: [] };
61+
62+
for (const entry of current) {
63+
const key = entryKey(entry);
64+
currentKeys.add(key);
65+
const base = baseMap.get(key);
66+
if (!base) {
67+
diff.added.push(key);
68+
} else if (fingerprint(entry) === fingerprint(base)) {
69+
diff.unchanged.push(key);
70+
} else {
71+
diff.changed.push(key);
72+
}
73+
}
74+
75+
for (const key of baseMap.keys()) {
76+
if (!currentKeys.has(key)) diff.removed.push(key);
77+
}
78+
79+
return diff;
80+
}
81+
82+
/** Fetch and parse the previously-deployed catalog as a diff baseline. */
83+
export async function fetchBaseline(url: string): Promise<DiffableEntry[]> {
84+
const res = await fetchWithRetry(url);
85+
const data = await res.json();
86+
if (!Array.isArray(data)) {
87+
throw new Error("Baseline catalog is not a JSON array");
88+
}
89+
return (data as DiffableEntry[]).flatMap((e) => {
90+
if (!e || typeof e.name !== "string" || typeof e.sourceId !== "string") return [];
91+
// `size`/`tarix` are reused verbatim into output, so reject bad types.
92+
return [
93+
{
94+
...e,
95+
size: typeof e.size === "number" ? e.size : undefined,
96+
tarix: typeof e.tarix === "boolean" ? e.tarix : undefined,
97+
},
98+
];
99+
});
100+
}

0 commit comments

Comments
 (0)