Skip to content

Commit 686c0d9

Browse files
authored
Fix guaranteed JSON retry recovery (#52)
1 parent 859dc21 commit 686c0d9

5 files changed

Lines changed: 180 additions & 24 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "spinedigest",
3-
"version": "0.1.7",
3+
"version": "0.1.8",
44
"description": "CLI-first tool for digesting long-form text and ebooks into compressed text, EPUB, or reusable .sdpub archives.",
55
"license": "Apache-2.0",
66
"type": "module",

src/editor/review.ts

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { z } from "zod";
22

3+
import { getLogger } from "../common/logging.js";
34
import {
5+
GuaranteedRequestFailureError,
46
requestGuaranteedJson,
57
RESPONSE_INTENT_CLASSIFIER_PROMPT_TEMPLATE,
68
} from "../guaranteed/index.js";
@@ -125,31 +127,54 @@ export class CompressionReviewer<S extends string> {
125127
previousHistory,
126128
);
127129

128-
return await requestGuaranteedJson({
129-
messages,
130-
parse: (data) => ({
131-
rawResponse: JSON.stringify(data),
130+
try {
131+
return await requestGuaranteedJson({
132+
messages,
133+
parse: (data) => ({
134+
rawResponse: JSON.stringify(data),
135+
review: {
136+
clueId: clueReviewer.clueId,
137+
issues: data.issues.map((issue) => ({
138+
...issue,
139+
severity: expectReviewSeverity(issue.severity),
140+
})),
141+
weight: clueReviewer.weight,
142+
},
143+
}),
144+
responseIntentClassifierPrompt: this.#llm.loadSystemPrompt(
145+
RESPONSE_INTENT_CLASSIFIER_PROMPT_TEMPLATE,
146+
),
147+
request: async (retryMessages, retryIndex, retryMax) =>
148+
await this.#llm.request(retryMessages, {
149+
retryIndex,
150+
retryMax,
151+
scope: this.#reviewScope,
152+
useCache: false,
153+
}),
154+
schema: reviewResponseSchema,
155+
});
156+
} catch (error) {
157+
if (!(error instanceof GuaranteedRequestFailureError)) {
158+
throw error;
159+
}
160+
161+
getLogger({
162+
clueId: clueReviewer.clueId,
163+
component: "editor-review",
164+
}).warn(
165+
{ error },
166+
"Compression reviewer failed to produce valid JSON; treating it as no issues.",
167+
);
168+
169+
return {
170+
rawResponse: undefined,
132171
review: {
133172
clueId: clueReviewer.clueId,
134-
issues: data.issues.map((issue) => ({
135-
...issue,
136-
severity: expectReviewSeverity(issue.severity),
137-
})),
173+
issues: [],
138174
weight: clueReviewer.weight,
139175
},
140-
}),
141-
responseIntentClassifierPrompt: this.#llm.loadSystemPrompt(
142-
RESPONSE_INTENT_CLASSIFIER_PROMPT_TEMPLATE,
143-
),
144-
request: async (retryMessages, retryIndex, retryMax) =>
145-
await this.#llm.request(retryMessages, {
146-
retryIndex,
147-
retryMax,
148-
scope: this.#reviewScope,
149-
useCache: false,
150-
}),
151-
schema: reviewResponseSchema,
152-
});
176+
};
177+
}
153178
}),
154179
);
155180
const rawResponses = Object.create(null) as Record<

src/guaranteed/request.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,16 @@ import {
2424
} from "./response.js";
2525
import type { GuaranteedRequestOptions } from "./types.js";
2626

27-
const DEFAULT_MAX_RETRIES = 7;
27+
const DEFAULT_MAX_RETRIES = 12;
28+
const MAX_MALFORMED_JSON_REPAIR_HISTORY_ATTEMPTS = 3;
2829

2930
export async function requestGuaranteedJson<TData, TResult>(
3031
options: GuaranteedRequestOptions<TData, TResult>,
3132
): Promise<TResult> {
3233
const initialMessages = [...options.messages];
3334
let currentMessages = [...options.messages];
3435
let consecutiveProtocolDerailments = 0;
36+
let consecutiveMalformedJsonFailures = 0;
3537
const maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
3638

3739
for (let index = 0; index <= maxRetries; index += 1) {
@@ -54,6 +56,7 @@ export async function requestGuaranteedJson<TData, TResult>(
5456

5557
if (intent === "natural_language") {
5658
consecutiveProtocolDerailments += 1;
59+
consecutiveMalformedJsonFailures = 0;
5760

5861
if (consecutiveProtocolDerailments >= 2 || index >= maxRetries) {
5962
const reason =
@@ -76,17 +79,20 @@ export async function requestGuaranteedJson<TData, TResult>(
7679
}
7780

7881
consecutiveProtocolDerailments = 0;
82+
consecutiveMalformedJsonFailures += 1;
7983
currentMessages = buildRetryMessages(
8084
initialMessages,
8185
response,
8286
intent === "malformed_json"
8387
? buildMalformedJsonMessage(asSyntaxError(error))
8488
: buildSyntaxErrorMessage(asSyntaxError(error)),
85-
true,
89+
consecutiveMalformedJsonFailures <
90+
MAX_MALFORMED_JSON_REPAIR_HISTORY_ATTEMPTS,
8691
);
8792
continue;
8893
}
8994
consecutiveProtocolDerailments = 0;
95+
consecutiveMalformedJsonFailures = 0;
9096

9197
const validation = await options.schema.safeParseAsync(parsedData);
9298

@@ -98,6 +104,7 @@ export async function requestGuaranteedJson<TData, TResult>(
98104

99105
if (intent === "natural_language") {
100106
consecutiveProtocolDerailments += 1;
107+
consecutiveMalformedJsonFailures = 0;
101108

102109
if (consecutiveProtocolDerailments >= 2 || index >= maxRetries) {
103110
const reason =

test/editor/review.test.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,73 @@ describe("editor/review", () => {
105105
["system", "user", "assistant", "user"],
106106
);
107107
});
108+
109+
it("falls back to no issues when a reviewer cannot produce valid JSON", async () => {
110+
const llm = new ScriptedLLM<SpineDigestScope>(Array(13).fill(""));
111+
const reviewer = new CompressionReviewer(
112+
llm as never,
113+
createSerialFragments(),
114+
{
115+
review: SPINE_DIGEST_EDITOR_SCOPES.review,
116+
reviewGuide: SPINE_DIGEST_EDITOR_SCOPES.reviewGuide,
117+
},
118+
Language.English,
119+
);
120+
121+
const result = await reviewer.reviewCompression(
122+
"Current compressed text",
123+
[
124+
{
125+
clueId: 1,
126+
label: "Alpha clue",
127+
reviewerInfo: "Check continuity",
128+
weight: 0.8,
129+
},
130+
],
131+
{},
132+
);
133+
134+
expect(result.rawResponses["1"]).toBeUndefined();
135+
expect(result.reviews).toStrictEqual([
136+
{
137+
clueId: 1,
138+
issues: [],
139+
weight: 0.8,
140+
},
141+
]);
142+
});
143+
144+
it("does not hide regular LLM request errors during review", async () => {
145+
const llm = new ScriptedLLM<SpineDigestScope>([
146+
() => {
147+
throw new Error("network failed");
148+
},
149+
]);
150+
const reviewer = new CompressionReviewer(
151+
llm as never,
152+
createSerialFragments(),
153+
{
154+
review: SPINE_DIGEST_EDITOR_SCOPES.review,
155+
reviewGuide: SPINE_DIGEST_EDITOR_SCOPES.reviewGuide,
156+
},
157+
Language.English,
158+
);
159+
160+
await expect(
161+
reviewer.reviewCompression(
162+
"Current compressed text",
163+
[
164+
{
165+
clueId: 1,
166+
label: "Alpha clue",
167+
reviewerInfo: "Check continuity",
168+
weight: 0.8,
169+
},
170+
],
171+
{},
172+
),
173+
).rejects.toThrow("network failed");
174+
});
108175
});
109176

110177
function createChunk(

test/guaranteed/request.test.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,21 @@ describe("guaranteed/request", () => {
9191
expect(request).toHaveBeenCalledTimes(2);
9292
});
9393

94+
it("uses twelve retries by default", async () => {
95+
const request = vi.fn(() => Promise.resolve(""));
96+
97+
await expect(
98+
requestGuaranteedJson({
99+
messages: [],
100+
parse: (data) => data.value,
101+
responseIntentClassifierPrompt: "classifier prompt",
102+
request,
103+
schema,
104+
}),
105+
).rejects.toBeInstanceOf(GuaranteedEmptyResponseError);
106+
expect(request).toHaveBeenCalledTimes(13);
107+
});
108+
94109
it("throws a parse validation error after exhausting retries", async () => {
95110
await expect(
96111
requestGuaranteedJson({
@@ -138,6 +153,48 @@ describe("guaranteed/request", () => {
138153
expect(secondCallMessages?.[1]?.content).toContain("malformed JSON");
139154
});
140155

156+
it("drops malformed JSON history after repeated repair failures", async () => {
157+
const request = vi
158+
.fn<
159+
(
160+
messages: readonly LLMessage[],
161+
retryIndex: number,
162+
retryMax: number,
163+
) => Promise<string>
164+
>()
165+
.mockResolvedValueOnce('{"value": "\\uZZZZ"}')
166+
.mockResolvedValueOnce('{"value": "\\uZZZZ"}')
167+
.mockResolvedValueOnce('{"value": "\\uZZZZ"}')
168+
.mockResolvedValueOnce('{"value": 11}');
169+
170+
const result = await requestGuaranteedJson({
171+
messages: [
172+
{
173+
role: "user",
174+
content: "Return JSON",
175+
},
176+
],
177+
parse: (data) => data.value,
178+
responseIntentClassifierPrompt: "classifier prompt",
179+
request,
180+
schema,
181+
});
182+
183+
expect(result).toBe(11);
184+
185+
const fourthCallMessages = request.mock.calls[3]?.[0];
186+
187+
expect(fourthCallMessages).toHaveLength(2);
188+
expect(fourthCallMessages?.[0]).toMatchObject({
189+
role: "user",
190+
content: "Return JSON",
191+
});
192+
expect(fourthCallMessages?.[1]).toMatchObject({
193+
role: "user",
194+
});
195+
expect(fourthCallMessages?.[1]?.content).toContain("malformed JSON");
196+
});
197+
141198
it("uses classifier fallback for ambiguous replies before deciding to keep history", async () => {
142199
const request = vi
143200
.fn<

0 commit comments

Comments
 (0)