-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimage-analysis.ts
More file actions
88 lines (78 loc) · 2.44 KB
/
image-analysis.ts
File metadata and controls
88 lines (78 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/**
* Image Analysis Example
*
* Demonstrates:
* - Building base64 image content blocks with `images()`
* - Sending multimodal messages to a vision-capable LLM
* - Structured extraction from image analysis
*
* Usage: bun run dev image-analysis <path-to-image> [low|mid|high|raw|<px>]
* Example: bun run dev image-analysis photo.png high
*/
import { z } from "zod";
import {
createLLM,
images,
resizeImage,
s,
StructuredParseError,
type ImageSize
} from "@/index";
const filePath = process.argv[3];
const rawSize = process.argv[4] ?? "mid";
const sizeArg: ImageSize = /^\d+$/.test(rawSize) ? parseInt(rawSize, 10) : (rawSize as ImageSize);
if (!filePath) {
console.error("Usage: bun run dev image-analysis <path-to-image> [low|mid|high|raw|<px>]");
console.error("Example: bun run dev image-analysis photo.png high");
process.exit(1);
}
const provider = (process.env.LLM_PROVIDER ?? "openai-compatible") as
| "openai-compatible"
| "anthropic-compatible";
const model = process.env.LLM_MODEL ?? "gpt-4o-mini";
const baseURL = process.env.LLM_BASE_URL;
const apiKey = process.env.LLM_API_KEY;
const debugEnabled = process.env.STRUCTURED_DEBUG === "1";
const llm = createLLM({
provider,
model,
transport: { baseURL, apiKey },
defaults: { mode: "loose", selfHeal: 1, debug: debugEnabled },
});
const ImageAnalysisSchema = s.schema(
"ImageAnalysis",
z.object({
description: s.string().min(1).describe("What is visible in the image."),
colors: s.array(s.string()).describe("Dominant colors present."),
objects: s.array(s.string()).describe("Main objects or subjects detected."),
mood: s.string().describe("Overall mood or atmosphere of the image."),
})
);
const imageInput = await resizeImage(filePath, sizeArg);
try {
const result = await llm.structured(
ImageAnalysisSchema,
{
messages: [
{
role: "user",
content: [
{ type: "text", text: "Analyze this image and return structured data." },
...images(imageInput),
],
},
],
},
{ request: { signal: AbortSignal.timeout(300_000) } },
);
console.log("Image analysis:");
console.log(JSON.stringify(result.data, null, 2));
console.log("Usage:", result.usage ?? {});
} catch (error) {
if (error instanceof StructuredParseError) {
console.error("Structured parsing failed.");
console.error("Zod issues:", error.zodIssues ?? []);
process.exit(1);
}
throw error;
}