Skip to content

Commit 8d94198

Browse files
committed
Add new Web Fetch tool
1 parent cfbfc78 commit 8d94198

11 files changed

Lines changed: 717 additions & 826 deletions

File tree

bun.lock

Lines changed: 59 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "dexter-ts",
3-
"version": "2026.5.30",
3+
"version": "2026.6.3",
44
"description": "Dexter - AI agent for deep financial research.",
55
"type": "module",
66
"main": "src/index.tsx",
@@ -28,6 +28,7 @@
2828
"@mariozechner/pi-tui": "^0.52.12",
2929
"@mozilla/readability": "^0.6.0",
3030
"@whiskeysockets/baileys": "7.0.0-rc.9",
31+
"axios": "^1.17.0",
3132
"better-sqlite3": "^12.8.0",
3233
"croner": "^9.1.0",
3334
"diff": "^8.0.4",
@@ -36,8 +37,10 @@
3637
"gray-matter": "^4.0.3",
3738
"langsmith": "^0.4.12",
3839
"linkedom": "^0.18.12",
40+
"lru-cache": "^11.5.1",
3941
"playwright": "^1.58.2",
4042
"qrcode-terminal": "^0.12.0",
43+
"turndown": "^7.2.4",
4144
"zod": "^4.3.6"
4245
},
4346
"devDependencies": {
@@ -47,6 +50,7 @@
4750
"@types/bun": "latest",
4851
"@types/jest": "^29.5.14",
4952
"@types/qrcode-terminal": "^0.12.2",
53+
"@types/turndown": "^5.0.6",
5054
"babel-jest": "^30.3.0",
5155
"jest": "^29.7.0",
5256
"ts-jest": "^29.4.6",

src/tools/fetch/binary-storage.ts

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/**
2+
* Binary content persistence for web_fetch.
3+
*
4+
* When a fetched URL returns binary data (PDFs, images, archives, etc.) the
5+
* raw bytes are written to disk with a mime-derived extension so the agent can
6+
* inspect the file later. The decoded text is still summarized inline; the
7+
* saved file is a supplement, not a replacement.
8+
*/
9+
import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
10+
import { dirname, join } from 'node:path';
11+
import { dexterPath } from '../../utils/paths.js';
12+
13+
// Directory where binary downloads are persisted.
14+
const WEB_FETCH_OUTPUT_DIR = dexterPath('web-fetch');
15+
16+
// Content types that are textual and therefore never treated as binary, even
17+
// though their top-level type may not be `text/*`.
18+
const TEXTUAL_CONTENT_TYPES = [
19+
'text/',
20+
'application/json',
21+
'application/xml',
22+
'application/xhtml+xml',
23+
'application/javascript',
24+
'application/ecmascript',
25+
'application/ld+json',
26+
'application/rss+xml',
27+
'application/atom+xml',
28+
'image/svg+xml',
29+
];
30+
31+
// Mime type -> file extension. Gives persisted binaries a real extension so
32+
// downstream tools (and the user) can open them.
33+
const MIME_TO_EXTENSION: Record<string, string> = {
34+
'application/pdf': 'pdf',
35+
'application/json': 'json',
36+
'text/csv': 'csv',
37+
'text/plain': 'txt',
38+
'text/html': 'html',
39+
'text/markdown': 'md',
40+
'application/zip': 'zip',
41+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
42+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
43+
'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
44+
'application/msword': 'doc',
45+
'application/vnd.ms-excel': 'xls',
46+
'audio/mpeg': 'mp3',
47+
'audio/wav': 'wav',
48+
'audio/ogg': 'ogg',
49+
'video/mp4': 'mp4',
50+
'video/webm': 'webm',
51+
'image/png': 'png',
52+
'image/jpeg': 'jpg',
53+
'image/gif': 'gif',
54+
'image/webp': 'webp',
55+
'image/svg+xml': 'svg',
56+
};
57+
58+
function normalizeContentType(contentType: string): string {
59+
return contentType.split(';')[0]!.trim().toLowerCase();
60+
}
61+
62+
/**
63+
* Whether a content type should be treated as binary (saved to disk) rather
64+
* than decoded as text.
65+
*/
66+
export function isBinaryContentType(contentType: string): boolean {
67+
const normalized = normalizeContentType(contentType);
68+
if (!normalized) {
69+
return false;
70+
}
71+
if (TEXTUAL_CONTENT_TYPES.some((prefix) => normalized.startsWith(prefix))) {
72+
return false;
73+
}
74+
return true;
75+
}
76+
77+
function extensionForContentType(contentType: string): string {
78+
const normalized = normalizeContentType(contentType);
79+
return MIME_TO_EXTENSION[normalized] ?? 'bin';
80+
}
81+
82+
export type PersistResult = { filepath: string; size: number } | { error: string };
83+
84+
/**
85+
* Persist raw binary bytes to the web-fetch output directory.
86+
* Returns the file path and size, or an error.
87+
*/
88+
export function persistBinaryContent(
89+
buffer: Buffer,
90+
contentType: string,
91+
id: string,
92+
): PersistResult {
93+
try {
94+
const extension = extensionForContentType(contentType);
95+
const filepath = join(WEB_FETCH_OUTPUT_DIR, `${id}.${extension}`);
96+
const dir = dirname(filepath);
97+
if (!existsSync(dir)) {
98+
mkdirSync(dir, { recursive: true });
99+
}
100+
writeFileSync(filepath, buffer);
101+
return { filepath, size: buffer.length };
102+
} catch (error) {
103+
return { error: error instanceof Error ? error.message : String(error) };
104+
}
105+
}

src/tools/fetch/cache.ts

Lines changed: 0 additions & 95 deletions
This file was deleted.

0 commit comments

Comments
 (0)