Skip to content

Commit 50c5786

Browse files
authored
feat(#23): kb ingest accepts multiple ids and prints a batch summary (#27)
* feat(#23): kb ingest accepts multiple ids and prints a batch summary paper7 kb ingest used to take exactly one identifier. Running 10-15 ingests in a research session meant launching the command per paper, then diffing paper7 kb list against the expected id list to find the failures, then retrying by hand. The argument is now variadic via Argument.variadic({ min: 1 }). Single-id behaviour is preserved exactly — the paper's markdown still streams to stdout, so existing pipes keep working. With two or more ids, a new runKbIngestBatch path takes over: it ingests serially (arxiv enforces a ~3s rate limit and S2 caps at ~1 req/s on the unauth tier; concurrency buys 429s rather than throughput), and prints one summary block: Ingested: N/M papers to <sources-dir> Failed: <id> — <reason> Parse failures, network errors, and cache errors all land in the Failed: list with a per-id reason. The batch exits 0 as long as at least one paper landed; if every id failed the new KbIngestBatchFailed error fires and the process exits 1 with 'error: all kb ingests failed' on stderr while the summary still goes to stdout. The renderer is intentionally terse — soft fallbacks from PR3 (ar5iv → abstract-only) print their own warnings via Effect.logWarning during ingest and count toward Ingested:, so the summary just reports the final tally. Closes #23. * feat(#23): preserve effect boundaries in kb ingest batch Pull rendering and the final fail-decision out of src/kb.ts so the domain module returns data and the CLI adapter decides how to present it. runKbIngestBatch now returns KbIngestBatchResult (attempts + sourcesDir); src/commands/kb.ts logs the summary, formats per-id errors, and raises KbIngestBatchFailed when every id failed. Narrow the per-id error boundary with Effect.catchTags. Only the four external fetch failures (GetArxivError, GetAr5ivError, GetPubmedError, GetCrossrefError) are converted to per-id Failed entries; KbIoError and the rest of GetError stay in the typed error channel so a wiki write failure / disk-full / permission problem still fails the whole batch loudly instead of being silently reported as a skipped paper. BatchAttempt now carries the typed BatchIngestError payload (new KbInvalidIdentifier tag covers unparseable raw ids), and the CLI renderer is the only place that stringifies it. KbIngestBatchFailed is raised with bare 'yield* new KbIngestBatchFailed(...)' per repo convention.
1 parent d622521 commit 50c5786

5 files changed

Lines changed: 156 additions & 11 deletions

File tree

src/cli.ts

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -305,11 +305,18 @@ export const makeRootCommand = (loaders?: Partial<CommandLoaders>) => {
305305
)
306306

307307
const kbIngestCommand = Command.make("ingest", {
308-
id: Argument.string("id").pipe(Argument.withDescription("Paper identifier"))
309-
}, (config) =>
310-
parseIdentifierEffect("kb ingest", config.id).pipe(
311-
Effect.flatMap((id) => runCommand({ tag: "kb-ingest", id }))
312-
)).pipe(Command.withShortDescription("Fetch paper into wiki sources"))
308+
ids: Argument.string("id").pipe(
309+
Argument.withDescription("One or more paper identifiers"),
310+
Argument.variadic({ min: 1 })
311+
)
312+
}, (config) => {
313+
if (config.ids.length === 1) {
314+
return parseIdentifierEffect("kb ingest", config.ids[0]).pipe(
315+
Effect.flatMap((id) => runCommand({ tag: "kb-ingest", id }))
316+
)
317+
}
318+
return runCommand({ tag: "kb-ingest-batch", rawIds: config.ids })
319+
}).pipe(Command.withShortDescription("Fetch one or more papers into wiki sources"))
313320

314321
const kbReadCommand = Command.make("read", {
315322
slug: Argument.string("slug").pipe(Argument.withDescription("Page slug, index, or log"))
@@ -476,6 +483,7 @@ function makeRunCommand(loaders: CommandLoaders) {
476483
})
477484
)
478485
case "kb-ingest":
486+
case "kb-ingest-batch":
479487
case "kb-read":
480488
case "kb-write":
481489
case "kb-search":
@@ -506,6 +514,7 @@ const isGetError = (error: KbError | GetError): error is GetError => {
506514
case "KbIoError":
507515
case "KbInvalidSlug":
508516
case "KbGetError":
517+
case "KbIngestBatchFailed":
509518
return false
510519
}
511520
}
@@ -518,6 +527,8 @@ const formatKbError = (error: KbError): string => {
518527
return `error: invalid wiki slug: ${error.slug}`
519528
case "KbGetError":
520529
return formatGetError(error.error)
530+
case "KbIngestBatchFailed":
531+
return `error: ${error.message}`
521532
}
522533
}
523534

src/commands/kb.ts

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,55 @@
11
import { Console, Effect } from "effect"
2-
import { runKb, type KbEnvironment, type KbError } from "../kb.js"
32
import type { GetError } from "../get.js"
3+
import {
4+
type BatchAttempt,
5+
type BatchIngestError,
6+
type KbEnvironment,
7+
type KbError,
8+
KbIngestBatchFailed,
9+
type KbIngestBatchResult,
10+
runKb,
11+
runKbIngestBatch,
12+
} from "../kb.js"
413
import type { CliCommand } from "../parser.js"
514

615
export const runKbCommand = (
716
command: Extract<CliCommand, { readonly tag: `kb-${string}` }>
8-
): Effect.Effect<void, KbError | GetError, KbEnvironment> =>
9-
runKb(command).pipe(Effect.flatMap((output) => Console.log(output)))
17+
): Effect.Effect<void, KbError | GetError, KbEnvironment> => {
18+
if (command.tag === "kb-ingest-batch") {
19+
return Effect.gen(function*() {
20+
const result = yield* runKbIngestBatch(command.rawIds)
21+
yield* Console.log(renderBatchSummary(result))
22+
const ingested = result.attempts.filter((attempt) => attempt._tag === "Ingested").length
23+
if (ingested === 0 && result.attempts.length > 0) {
24+
return yield* new KbIngestBatchFailed({ message: "all kb ingests failed" })
25+
}
26+
})
27+
}
28+
return runKb(command).pipe(Effect.flatMap((output) => Console.log(output)))
29+
}
30+
31+
const renderBatchSummary = (result: KbIngestBatchResult): string => {
32+
const ingested = result.attempts.filter((attempt) => attempt._tag === "Ingested").length
33+
const header = `Ingested: ${ingested}/${result.attempts.length} papers to ${result.sourcesDir}`
34+
const failed = result.attempts.filter(
35+
(attempt): attempt is Extract<BatchAttempt, { readonly _tag: "Failed" }> => attempt._tag === "Failed"
36+
)
37+
if (failed.length === 0) return header
38+
return [
39+
header,
40+
"Failed:",
41+
...failed.map((attempt) => ` ${attempt.raw}${formatBatchError(attempt.error)}`),
42+
].join("\n")
43+
}
44+
45+
const formatBatchError = (error: BatchIngestError): string => {
46+
switch (error._tag) {
47+
case "KbInvalidIdentifier":
48+
return "invalid identifier"
49+
case "GetArxivError":
50+
case "GetAr5ivError":
51+
case "GetPubmedError":
52+
case "GetCrossrefError":
53+
return error.error.message
54+
}
55+
}

src/kb.ts

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ import { join } from "node:path"
44
import type { Ar5ivClient } from "./ar5iv.js"
55
import type { ArxivClient } from "./arxiv.js"
66
import type { CrossrefClient } from "./crossref.js"
7-
import { getPaper, type GetError } from "./get.js"
7+
import { type GetAr5ivError, type GetArxivError, type GetCrossrefError, type GetError, getPaper, type GetPubmedError } from "./get.js"
88
import type { CliCommand, PaperIdentifier } from "./parser.js"
9+
import { parsePaperIdentifier } from "./parser.js"
910
import type { PubmedClient } from "./pubmed.js"
1011
import type { SemanticScholarClient } from "./semanticScholar.js"
1112

@@ -22,7 +23,31 @@ export class KbGetError extends Data.TaggedError("KbGetError")<{
2223
readonly error: GetError
2324
}> {}
2425

25-
export type KbError = KbIoError | KbInvalidSlug | KbGetError
26+
export class KbInvalidIdentifier extends Data.TaggedError("KbInvalidIdentifier")<{
27+
readonly raw: string
28+
}> {}
29+
30+
export class KbIngestBatchFailed extends Data.TaggedError("KbIngestBatchFailed")<{
31+
readonly message: string
32+
}> {}
33+
34+
export type KbError = KbIoError | KbInvalidSlug | KbGetError | KbIngestBatchFailed
35+
36+
export type BatchIngestError =
37+
| KbInvalidIdentifier
38+
| GetArxivError
39+
| GetAr5ivError
40+
| GetPubmedError
41+
| GetCrossrefError
42+
43+
export type BatchAttempt =
44+
| { readonly _tag: "Ingested"; readonly raw: string }
45+
| { readonly _tag: "Failed"; readonly raw: string; readonly error: BatchIngestError }
46+
47+
export type KbIngestBatchResult = {
48+
readonly attempts: ReadonlyArray<BatchAttempt>
49+
readonly sourcesDir: string
50+
}
2651

2752
type WikiPaths = {
2853
readonly root: string
@@ -34,7 +59,7 @@ type WikiPaths = {
3459

3560
export type KbEnvironment = Ar5ivClient | ArxivClient | CrossrefClient | PubmedClient | SemanticScholarClient
3661

37-
export const runKb = (command: Extract<CliCommand, { readonly tag: `kb-${string}` }>): Effect.Effect<string, KbError | GetError, KbEnvironment> => {
62+
export const runKb = (command: Exclude<Extract<CliCommand, { readonly tag: `kb-${string}` }>, { readonly tag: "kb-ingest-batch" }>): Effect.Effect<string, KbError | GetError, KbEnvironment> => {
3863
switch (command.tag) {
3964
case "kb-ingest":
4065
return ingest(command.id)
@@ -51,6 +76,30 @@ export const runKb = (command: Extract<CliCommand, { readonly tag: `kb-${string}
5176
}
5277
}
5378

79+
export const runKbIngestBatch = (rawIds: ReadonlyArray<string>): Effect.Effect<KbIngestBatchResult, KbError | GetError, KbEnvironment> =>
80+
Effect.gen(function*() {
81+
const paths = wikiPaths()
82+
yield* ensureWiki(paths)
83+
const attempts = yield* Effect.forEach(rawIds, ingestOneForBatch)
84+
return { attempts, sourcesDir: paths.sources }
85+
})
86+
87+
const ingestOneForBatch = (raw: string): Effect.Effect<BatchAttempt, KbError | GetError, KbEnvironment> => {
88+
const id = parsePaperIdentifier(raw)
89+
if (id === undefined) {
90+
return Effect.succeed<BatchAttempt>({ _tag: "Failed", raw, error: new KbInvalidIdentifier({ raw }) })
91+
}
92+
return ingest(id).pipe(
93+
Effect.map((): BatchAttempt => ({ _tag: "Ingested", raw })),
94+
Effect.catchTags({
95+
GetArxivError: (error) => Effect.succeed<BatchAttempt>({ _tag: "Failed", raw, error }),
96+
GetAr5ivError: (error) => Effect.succeed<BatchAttempt>({ _tag: "Failed", raw, error }),
97+
GetPubmedError: (error) => Effect.succeed<BatchAttempt>({ _tag: "Failed", raw, error }),
98+
GetCrossrefError: (error) => Effect.succeed<BatchAttempt>({ _tag: "Failed", raw, error }),
99+
}),
100+
)
101+
}
102+
54103
const ingest = (id: PaperIdentifier): Effect.Effect<string, KbError | GetError, KbEnvironment> => {
55104
const paths = wikiPaths()
56105
const sourceName = sourceFileName(id)

src/parser.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ export type CliCommand =
5050
| { readonly tag: "vault-all" }
5151
| { readonly tag: "browse" }
5252
| { readonly tag: "kb-ingest"; readonly id: PaperIdentifier }
53+
| { readonly tag: "kb-ingest-batch"; readonly rawIds: ReadonlyArray<string> }
5354
| { readonly tag: "kb-read"; readonly slug: string }
5455
| { readonly tag: "kb-write"; readonly slug: string }
5556
| { readonly tag: "kb-search"; readonly pattern: string }

tests/kb.test.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,42 @@ describe("kb command", () => {
9696
expect(status.stdout).toContain("Pages: 1")
9797
expect(ingest.stdout).toContain("Attention Is All You Need")
9898
})))
99+
100+
it.effect("kb ingest with multiple ids prints a summary instead of paper markdown (issue #23)", () =>
101+
withTempHome(Effect.gen(function*() {
102+
const home = process.env.HOME ?? ""
103+
const sources = join(home, ".paper7", "wiki", "sources")
104+
const result = yield* run(["kb", "ingest", "1706.03762", "2401.04088"])
105+
106+
expect(result.exit._tag).toBe("Success")
107+
expect(result.stderr).toBe("")
108+
expect(result.stdout).toBe(`Ingested: 2/2 papers to ${sources}`)
109+
expect(result.stdout).not.toContain("Attention Is All You Need")
110+
})))
111+
112+
it.effect("kb ingest batch reports invalid identifiers in the Failed section (issue #23)", () =>
113+
withTempHome(Effect.gen(function*() {
114+
const home = process.env.HOME ?? ""
115+
const sources = join(home, ".paper7", "wiki", "sources")
116+
const result = yield* run(["kb", "ingest", "1706.03762", "bogus.id"])
117+
118+
expect(result.exit._tag).toBe("Success")
119+
expect(result.stderr).toBe("")
120+
expect(result.stdout).toContain(`Ingested: 1/2 papers to ${sources}`)
121+
expect(result.stdout).toContain("Failed:")
122+
expect(result.stdout).toContain("bogus.id — invalid identifier")
123+
})))
124+
125+
it.effect("kb ingest batch exits 1 when every id fails (issue #23)", () =>
126+
withTempHome(Effect.gen(function*() {
127+
const home = process.env.HOME ?? ""
128+
const sources = join(home, ".paper7", "wiki", "sources")
129+
const result = yield* run(["kb", "ingest", "bogus.one", "bogus.two"])
130+
131+
expect(result.exit._tag).toBe("Failure")
132+
expect(result.stdout).toContain(`Ingested: 0/2 papers to ${sources}`)
133+
expect(result.stdout).toContain("bogus.one — invalid identifier")
134+
expect(result.stdout).toContain("bogus.two — invalid identifier")
135+
expect(result.stderr).toBe("error: all kb ingests failed")
136+
})))
99137
})

0 commit comments

Comments
 (0)