Skip to content

Commit bf81fa7

Browse files
authored
feat(read-file): implement incremental token-budgeted file reading (#10052)
1 parent 1ff5d1d commit bf81fa7

File tree

6 files changed

+644
-629
lines changed

6 files changed

+644
-629
lines changed

src/core/tools/ReadFileTool.ts

Lines changed: 74 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import {
2525
processImageFile,
2626
ImageMemoryTracker,
2727
} from "./helpers/imageHelpers"
28-
import { validateFileTokenBudget, truncateFileContent } from "./helpers/fileTokenBudget"
28+
import { FILE_READ_BUDGET_PERCENT, readFileWithTokenBudget } from "./helpers/fileTokenBudget"
2929
import { truncateDefinitionsToLineLimit } from "./helpers/truncateDefinitions"
3030
import { BaseTool, ToolCallbacks } from "./BaseTool"
3131
import type { ToolUse } from "../../shared/tools"
@@ -386,7 +386,38 @@ export class ReadFileTool extends BaseTool<"read_file"> {
386386
}
387387

388388
if (supportedBinaryFormats && supportedBinaryFormats.includes(fileExtension)) {
389-
// Fall through to extractTextFromFile
389+
// Use extractTextFromFile for supported binary formats (PDF, DOCX, etc.)
390+
try {
391+
const content = await extractTextFromFile(fullPath)
392+
const numberedContent = addLineNumbers(content)
393+
const lines = content.split("\n")
394+
const lineCount = lines.length
395+
const lineRangeAttr = lineCount > 0 ? ` lines="1-${lineCount}"` : ""
396+
397+
await task.fileContextTracker.trackFileContext(relPath, "read_tool" as RecordSource)
398+
399+
updateFileResult(relPath, {
400+
xmlContent:
401+
lineCount > 0
402+
? `<file><path>${relPath}</path>\n<content${lineRangeAttr}>\n${numberedContent}</content>\n</file>`
403+
: `<file><path>${relPath}</path>\n<content/><notice>File is empty</notice>\n</file>`,
404+
nativeContent:
405+
lineCount > 0
406+
? `File: ${relPath}\nLines 1-${lineCount}:\n${numberedContent}`
407+
: `File: ${relPath}\nNote: File is empty`,
408+
})
409+
continue
410+
} catch (error) {
411+
const errorMsg = error instanceof Error ? error.message : String(error)
412+
updateFileResult(relPath, {
413+
status: "error",
414+
error: `Error extracting text: ${errorMsg}`,
415+
xmlContent: `<file><path>${relPath}</path><error>Error extracting text: ${errorMsg}</error></file>`,
416+
nativeContent: `File: ${relPath}\nError: Error extracting text: ${errorMsg}`,
417+
})
418+
await task.say("error", `Error extracting text from ${relPath}: ${errorMsg}`)
419+
continue
420+
}
390421
} else {
391422
const fileFormat = fileExtension.slice(1) || "bin"
392423
updateFileResult(relPath, {
@@ -492,48 +523,54 @@ export class ReadFileTool extends BaseTool<"read_file"> {
492523
settings: task.apiConfiguration,
493524
}) ?? ANTHROPIC_DEFAULT_MAX_TOKENS
494525

495-
const budgetResult = await validateFileTokenBudget(
496-
fullPath,
497-
contextWindow - maxOutputTokens,
498-
contextTokens || 0,
499-
)
526+
// Calculate available token budget (60% of remaining context)
527+
const remainingTokens = contextWindow - maxOutputTokens - (contextTokens || 0)
528+
const safeReadBudget = Math.floor(remainingTokens * FILE_READ_BUDGET_PERCENT)
500529

501-
let content = await extractTextFromFile(fullPath)
530+
let content: string
502531
let xmlInfo = ""
503-
504532
let nativeInfo = ""
505533

506-
if (budgetResult.shouldTruncate && budgetResult.maxChars !== undefined) {
507-
const truncateResult = truncateFileContent(
508-
content,
509-
budgetResult.maxChars,
510-
content.length,
511-
budgetResult.isPreview,
512-
)
513-
content = truncateResult.content
514-
515-
let displayedLines = content.length === 0 ? 0 : content.split(/\r?\n/).length
516-
if (displayedLines > 0 && content.endsWith("\n")) {
517-
displayedLines--
518-
}
519-
const lineRangeAttr = displayedLines > 0 ? ` lines="1-${displayedLines}"` : ""
520-
xmlInfo =
521-
content.length > 0 ? `<content${lineRangeAttr}>\n${content}</content>\n` : `<content/>`
522-
xmlInfo += `<notice>${truncateResult.notice}</notice>\n`
523-
524-
nativeInfo =
525-
content.length > 0
526-
? `Lines 1-${displayedLines}:\n${content}\n\nNote: ${truncateResult.notice}`
527-
: `Note: ${truncateResult.notice}`
534+
if (safeReadBudget <= 0) {
535+
// No budget available
536+
content = ""
537+
const notice = "No available context budget for file reading"
538+
xmlInfo = `<content/>\n<notice>${notice}</notice>\n`
539+
nativeInfo = `Note: ${notice}`
528540
} else {
529-
const lineRangeAttr = ` lines="1-${totalLines}"`
530-
xmlInfo = totalLines > 0 ? `<content${lineRangeAttr}>\n${content}</content>\n` : `<content/>`
541+
// Read file with incremental token counting
542+
const result = await readFileWithTokenBudget(fullPath, {
543+
budgetTokens: safeReadBudget,
544+
})
531545

532-
if (totalLines === 0) {
533-
xmlInfo += `<notice>File is empty</notice>\n`
534-
nativeInfo = "Note: File is empty"
546+
content = addLineNumbers(result.content)
547+
548+
if (!result.complete) {
549+
// File was truncated
550+
const notice = `File truncated: showing ${result.lineCount} lines (${result.tokenCount} tokens) due to context budget. Use line_range to read specific sections.`
551+
const lineRangeAttr = result.lineCount > 0 ? ` lines="1-${result.lineCount}"` : ""
552+
xmlInfo =
553+
result.lineCount > 0
554+
? `<content${lineRangeAttr}>\n${content}</content>\n<notice>${notice}</notice>\n`
555+
: `<content/>\n<notice>${notice}</notice>\n`
556+
nativeInfo =
557+
result.lineCount > 0
558+
? `Lines 1-${result.lineCount}:\n${content}\n\nNote: ${notice}`
559+
: `Note: ${notice}`
535560
} else {
536-
nativeInfo = `Lines 1-${totalLines}:\n${content}`
561+
// Full file read
562+
const lineRangeAttr = ` lines="1-${result.lineCount}"`
563+
xmlInfo =
564+
result.lineCount > 0
565+
? `<content${lineRangeAttr}>\n${content}</content>\n`
566+
: `<content/>`
567+
568+
if (result.lineCount === 0) {
569+
xmlInfo += `<notice>File is empty</notice>\n`
570+
nativeInfo = "Note: File is empty"
571+
} else {
572+
nativeInfo = `Lines 1-${result.lineCount}:\n${content}`
573+
}
537574
}
538575
}
539576

src/core/tools/__tests__/readFileTool.spec.ts

Lines changed: 64 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,29 @@ vi.mock("fs/promises", () => fsPromises)
3636
// Mock input content for tests
3737
let mockInputContent = ""
3838

39+
// Create hoisted mocks that can be used in vi.mock factories
40+
const { addLineNumbersMock, mockReadFileWithTokenBudget } = vi.hoisted(() => {
41+
const addLineNumbersMock = vi.fn().mockImplementation((text: string, startLine = 1) => {
42+
if (!text) return ""
43+
const lines = typeof text === "string" ? text.split("\n") : [text]
44+
return lines.map((line: string, i: number) => `${startLine + i} | ${line}`).join("\n")
45+
})
46+
const mockReadFileWithTokenBudget = vi.fn()
47+
return { addLineNumbersMock, mockReadFileWithTokenBudget }
48+
})
49+
3950
// First create all the mocks
4051
vi.mock("../../../integrations/misc/extract-text", () => ({
4152
extractTextFromFile: vi.fn(),
42-
addLineNumbers: vi.fn(),
53+
addLineNumbers: addLineNumbersMock,
4354
getSupportedBinaryFormats: vi.fn(() => [".pdf", ".docx", ".ipynb"]),
4455
}))
4556
vi.mock("../../../services/tree-sitter")
4657

47-
// Then create the mock functions
48-
const addLineNumbersMock = vi.fn().mockImplementation((text, startLine = 1) => {
49-
if (!text) return ""
50-
const lines = typeof text === "string" ? text.split("\n") : [text]
51-
return lines.map((line, i) => `${startLine + i} | ${line}`).join("\n")
52-
})
58+
// Mock readFileWithTokenBudget - must be mocked to prevent actual file system access
59+
vi.mock("../../../integrations/misc/read-file-with-budget", () => ({
60+
readFileWithTokenBudget: (...args: any[]) => mockReadFileWithTokenBudget(...args),
61+
}))
5362

5463
const extractTextFromFileMock = vi.fn()
5564
const getSupportedBinaryFormatsMock = vi.fn(() => [".pdf", ".docx", ".ipynb"])
@@ -145,6 +154,27 @@ beforeEach(() => {
145154
})
146155
: []
147156
})
157+
158+
// Reset addLineNumbers mock to its default implementation (prevents cross-test pollution)
159+
addLineNumbersMock.mockReset()
160+
addLineNumbersMock.mockImplementation((text: string, startLine = 1) => {
161+
if (!text) return ""
162+
const lines = typeof text === "string" ? text.split("\n") : [text]
163+
return lines.map((line: string, i: number) => `${startLine + i} | ${line}`).join("\n")
164+
})
165+
166+
// Reset readFileWithTokenBudget mock with default implementation
167+
mockReadFileWithTokenBudget.mockClear()
168+
mockReadFileWithTokenBudget.mockImplementation(async (_filePath: string, _options: any) => {
169+
// Default: return the mockInputContent with 5 lines
170+
const lines = mockInputContent ? mockInputContent.split("\n") : []
171+
return {
172+
content: mockInputContent,
173+
tokenCount: mockInputContent.length / 4, // rough estimate
174+
lineCount: lines.length,
175+
complete: true,
176+
}
177+
})
148178
})
149179

150180
// Mock i18n translation function
@@ -496,7 +526,16 @@ describe("read_file tool with maxReadFileLine setting", () => {
496526
it("should read with extractTextFromFile when file has few lines", async () => {
497527
// Setup
498528
mockedCountFileLines.mockResolvedValue(3) // File shorter than maxReadFileLine
499-
mockInputContent = fileContent
529+
const threeLineContent = "Line 1\nLine 2\nLine 3"
530+
mockInputContent = threeLineContent
531+
532+
// Configure the mock to return the correct content for this test
533+
mockReadFileWithTokenBudget.mockResolvedValueOnce({
534+
content: threeLineContent,
535+
tokenCount: threeLineContent.length / 4,
536+
lineCount: 3,
537+
complete: true,
538+
})
500539

501540
// Execute
502541
const result = await executeReadFileTool({}, { maxReadFileLine: 5, totalLines: 3 })
@@ -656,11 +695,15 @@ describe("read_file tool XML output structure", () => {
656695
it("should produce XML output with no unnecessary indentation", async () => {
657696
// Setup
658697
const numberedContent = "1 | Line 1\n2 | Line 2\n3 | Line 3\n4 | Line 4\n5 | Line 5"
659-
// For XML structure test
660-
mockedExtractTextFromFile.mockImplementation(() => {
661-
addLineNumbersMock(mockInputContent)
662-
return Promise.resolve(numberedContent)
698+
699+
// Configure mockReadFileWithTokenBudget to return the 5-line content
700+
mockReadFileWithTokenBudget.mockResolvedValueOnce({
701+
content: fileContent, // "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"
702+
tokenCount: fileContent.length / 4,
703+
lineCount: 5,
704+
complete: true,
663705
})
706+
664707
mockProvider.getState.mockResolvedValue({
665708
maxReadFileLine: -1,
666709
maxImageFileSize: 20,
@@ -693,7 +736,15 @@ describe("read_file tool XML output structure", () => {
693736
it("should handle empty files correctly", async () => {
694737
// Setup
695738
mockedCountFileLines.mockResolvedValue(0)
696-
mockedExtractTextFromFile.mockResolvedValue("")
739+
740+
// Configure mockReadFileWithTokenBudget to return empty content
741+
mockReadFileWithTokenBudget.mockResolvedValueOnce({
742+
content: "",
743+
tokenCount: 0,
744+
lineCount: 0,
745+
complete: true,
746+
})
747+
697748
mockProvider.getState.mockResolvedValue({
698749
maxReadFileLine: -1,
699750
maxImageFileSize: 20,

0 commit comments

Comments
 (0)