From 5a040b27eef3edeb8f12cecbb1a21c1670b09864 Mon Sep 17 00:00:00 2001 From: ComputelessComputer <63365510+ComputelessComputer@users.noreply.github.com> Date: Mon, 1 Jun 2026 12:36:59 +0900 Subject: [PATCH] fix(enhance): include note images in summaries Read referenced image attachments for multimodal summary models and send them as AI SDK image parts. Add fs-sync attachment reading support and coverage. --- .../ai/shared/model-capabilities.test.ts | 33 ++ .../settings/ai/shared/model-capabilities.ts | 28 ++ .../task-configs/enhance-images.test.ts | 121 +++++++ .../ai-task/task-configs/enhance-images.ts | 332 ++++++++++++++++++ .../task-configs/enhance-transform.test.ts | 83 ++++- .../ai-task/task-configs/enhance-transform.ts | 20 ++ .../ai-task/task-configs/enhance-workflow.ts | 66 +++- .../zustand/ai-task/task-configs/index.ts | 6 +- crates/fs-sync-core/src/lib.rs | 24 ++ plugins/fs-sync/build.rs | 1 + plugins/fs-sync/js/bindings.gen.ts | 8 + .../commands/attachment_read.toml | 13 + .../permissions/autogenerated/reference.md | 27 ++ plugins/fs-sync/permissions/default.toml | 1 + .../fs-sync/permissions/schemas/schema.json | 16 +- plugins/fs-sync/src/commands.rs | 14 + plugins/fs-sync/src/ext.rs | 8 + plugins/fs-sync/src/lib.rs | 1 + 18 files changed, 788 insertions(+), 14 deletions(-) create mode 100644 apps/desktop/src/settings/ai/shared/model-capabilities.test.ts create mode 100644 apps/desktop/src/settings/ai/shared/model-capabilities.ts create mode 100644 apps/desktop/src/store/zustand/ai-task/task-configs/enhance-images.test.ts create mode 100644 apps/desktop/src/store/zustand/ai-task/task-configs/enhance-images.ts create mode 100644 plugins/fs-sync/permissions/autogenerated/commands/attachment_read.toml diff --git a/apps/desktop/src/settings/ai/shared/model-capabilities.test.ts b/apps/desktop/src/settings/ai/shared/model-capabilities.test.ts new file mode 100644 index 0000000000..81988cd282 --- /dev/null +++ b/apps/desktop/src/settings/ai/shared/model-capabilities.test.ts @@ -0,0 +1,33 @@ +import { describe, expect, it } from "vitest"; + +import { modelSupportsImageInput } from "./model-capabilities"; + +describe("modelSupportsImageInput", () => { + it("allows known multimodal hosted models", () => { + expect(modelSupportsImageInput("hyprnote", "Auto")).toBe(true); + expect(modelSupportsImageInput("openai", "gpt-4o")).toBe(true); + expect(modelSupportsImageInput("anthropic", "claude-3-5-sonnet")).toBe( + true, + ); + expect( + modelSupportsImageInput("google_generative_ai", "gemini-2.5-pro"), + ).toBe(true); + }); + + it("blocks known text-only or non-chat models", () => { + expect(modelSupportsImageInput("openai", "gpt-3.5-turbo")).toBe(false); + expect(modelSupportsImageInput("openai", "gpt-4")).toBe(false); + expect(modelSupportsImageInput("anthropic", "claude-2.1")).toBe(false); + expect(modelSupportsImageInput("anthropic", "custom-text-model")).toBe( + false, + ); + expect(modelSupportsImageInput("openai", "text-embedding-3-large")).toBe( + false, + ); + }); + + it("requires a vision-like model name for unknown local providers", () => { + expect(modelSupportsImageInput("ollama", "llava:latest")).toBe(true); + expect(modelSupportsImageInput("custom", "llama-3.1-8b")).toBe(false); + }); +}); diff --git a/apps/desktop/src/settings/ai/shared/model-capabilities.ts b/apps/desktop/src/settings/ai/shared/model-capabilities.ts new file mode 100644 index 0000000000..359d206665 --- /dev/null +++ b/apps/desktop/src/settings/ai/shared/model-capabilities.ts @@ -0,0 +1,28 @@ +const TEXT_ONLY_MODEL_RE = + /(?:^|[/:\-.])(?:gpt-3\.5|claude-2|claude-instant|davinci|babbage|curie|ada|dall-e|sora|gpt-image|image-generation|embed|embedding|whisper|tts|transcribe|moderation|realtime|computer)(?:$|[/:\-.])/i; + +const IMAGE_INPUT_MODEL_RE = + /(?:gpt-4o|gpt-4\.1|gpt-5|claude-3|claude-sonnet|claude-opus|claude-haiku|gemini|pixtral|vision|vl|llava|llama-3\.2-vision|llama3\.2-vision|moondream|minicpm-v|internvl|qwen(?:2|2\.5|3)?-vl|gemma-3|gemma3)/i; + +export function modelSupportsImageInput( + providerId: string | undefined, + modelId: string | undefined, +): boolean { + if (!providerId || !modelId) { + return false; + } + + if (TEXT_ONLY_MODEL_RE.test(modelId)) { + return false; + } + + if (providerId === "hyprnote" && modelId === "Auto") { + return true; + } + + if (IMAGE_INPUT_MODEL_RE.test(modelId)) { + return true; + } + + return false; +} diff --git a/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-images.test.ts b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-images.test.ts new file mode 100644 index 0000000000..c5a7d5ca01 --- /dev/null +++ b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-images.test.ts @@ -0,0 +1,121 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +import { + collectEnhanceImageContext, + collectImageReferences, + getBase64ByteLength, +} from "./enhance-images"; + +const fsSyncMocks = vi.hoisted(() => ({ + attachmentList: vi.fn(), + attachmentRead: vi.fn(), +})); + +vi.mock("@hypr/plugin-fs-sync", () => ({ + commands: fsSyncMocks, +})); + +describe("enhance image context", () => { + beforeEach(() => { + vi.clearAllMocks(); + fsSyncMocks.attachmentList.mockResolvedValue({ + status: "ok", + data: [ + { + attachmentId: "diagram.png", + path: "/vault/sessions/session-1/attachments/diagram.png", + extension: "png", + modifiedAt: "", + }, + { + attachmentId: "stale.png", + path: "/vault/sessions/session-1/attachments/stale.png", + extension: "png", + modifiedAt: "", + }, + ], + }); + fsSyncMocks.attachmentRead.mockResolvedValue({ + status: "ok", + data: [104, 101, 108, 108, 111], + }); + }); + + it("reads only image attachments referenced by note JSON", async () => { + const rawContent = JSON.stringify({ + type: "doc", + content: [ + { + type: "image", + attrs: { + src: "asset://localhost/%2Fvault%2Fsessions%2Fsession-1%2Fattachments%2Fdiagram.png", + attachmentId: "diagram.png", + }, + }, + ], + }); + + const images = await collectEnhanceImageContext("session-1", rawContent); + + expect(images).toEqual([ + { + base64: "aGVsbG8=", + mimeType: "image/png", + filename: "diagram.png", + }, + ]); + expect(fsSyncMocks.attachmentRead).toHaveBeenCalledWith( + "session-1", + "diagram.png", + ); + }); + + it("extracts markdown image filenames from asset URLs", () => { + expect( + collectImageReferences( + "![diagram](asset://localhost/%2Fvault%2Fsessions%2Fsession-1%2Fattachments%2Fdiagram.png)", + ), + ).toEqual([{ filename: "diagram.png" }]); + }); + + it("does not treat remote markdown images as local attachments", () => { + expect( + collectImageReferences("![diagram](https://example.com/diagram.png)"), + ).toEqual([{ filename: undefined }]); + }); + + it("keeps base64 data URL images without reading attachments", async () => { + const images = await collectEnhanceImageContext( + "session-1", + "![pasted](data:image/png;base64,abc123)", + ); + + expect(images).toEqual([{ base64: "abc123", mimeType: "image/png" }]); + expect(fsSyncMocks.attachmentList).not.toHaveBeenCalled(); + }); + + it("does not load an attachment again for a node that already has a data URL", async () => { + const rawContent = JSON.stringify({ + type: "doc", + content: [ + { + type: "image", + attrs: { + src: "data:image/png;base64,abc123", + attachmentId: "diagram.png", + }, + }, + ], + }); + + const images = await collectEnhanceImageContext("session-1", rawContent); + + expect(images).toEqual([{ base64: "abc123", mimeType: "image/png" }]); + expect(fsSyncMocks.attachmentList).not.toHaveBeenCalled(); + }); + + it("computes decoded base64 byte length before applying the data URL cap", () => { + expect(getBase64ByteLength("aGVsbG8=")).toBe(5); + expect(getBase64ByteLength("YW55IGNhcm5hbCBwbGVhcw==")).toBe(16); + }); +}); diff --git a/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-images.ts b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-images.ts new file mode 100644 index 0000000000..6da4a35c0b --- /dev/null +++ b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-images.ts @@ -0,0 +1,332 @@ +import { + type AttachmentInfo, + commands as fsSyncCommands, +} from "@hypr/plugin-fs-sync"; + +export type EnhanceImageContext = { + base64: string; + mimeType: string; + filename?: string; +}; + +type ImageReference = { + attachmentId?: string; + filename?: string; + dataUrl?: { base64: string; mimeType: string }; +}; + +const MAX_IMAGE_COUNT = 10; +const MAX_IMAGE_BYTES = 8 * 1024 * 1024; + +const EXTENSION_TO_MIME: Record = { + gif: "image/gif", + jpeg: "image/jpeg", + jpg: "image/jpeg", + png: "image/png", + webp: "image/webp", +}; + +const MARKDOWN_IMAGE_RE = /!\[[^\]]*]\((<[^>]+>|[^)\s]+)(?:\s+"[^"]*")?\)/g; + +export async function collectEnhanceImageContext( + sessionId: string, + rawContent: string | string[], +): Promise { + const references = Array.isArray(rawContent) + ? rawContent.flatMap(collectImageReferences) + : collectImageReferences(rawContent); + const images: EnhanceImageContext[] = []; + + for (const ref of references) { + if (!ref.dataUrl) { + continue; + } + + images.push(ref.dataUrl); + if (images.length >= MAX_IMAGE_COUNT) { + return images; + } + } + + const attachmentRefs = references.filter( + (ref) => !ref.dataUrl && (ref.attachmentId || ref.filename), + ); + if (attachmentRefs.length === 0) { + return images; + } + + const listResult = await fsSyncCommands.attachmentList(sessionId); + if (listResult.status === "error") { + console.warn( + "[enhance] failed to list image attachments", + listResult.error, + ); + return images; + } + + const attachmentsById = new Map( + listResult.data.map((attachment) => [attachment.attachmentId, attachment]), + ); + const attachmentsByFilename = new Map( + listResult.data.map((attachment) => [ + getPathFilename(attachment.path) || attachment.attachmentId, + attachment, + ]), + ); + const seen = new Set(); + + for (const ref of attachmentRefs) { + const attachment = + (ref.attachmentId ? attachmentsById.get(ref.attachmentId) : undefined) ?? + (ref.filename ? attachmentsById.get(ref.filename) : undefined) ?? + (ref.filename ? attachmentsByFilename.get(ref.filename) : undefined); + + if (!attachment || seen.has(attachment.attachmentId)) { + continue; + } + + seen.add(attachment.attachmentId); + const image = await readImageAttachment(sessionId, attachment); + if (!image) { + continue; + } + + images.push(image); + if (images.length >= MAX_IMAGE_COUNT) { + return images; + } + } + + return images; +} + +export function collectImageReferences(rawContent: string): ImageReference[] { + const trimmed = rawContent.trim(); + if (!trimmed) { + return []; + } + + if (trimmed.startsWith("{")) { + try { + return collectJsonImageReferences(JSON.parse(trimmed)); + } catch { + return []; + } + } + + return collectMarkdownImageReferences(trimmed); +} + +async function readImageAttachment( + sessionId: string, + attachment: AttachmentInfo, +): Promise { + const mimeType = getImageMimeType(attachment.extension); + if (!mimeType) { + return null; + } + + const readResult = await fsSyncCommands.attachmentRead( + sessionId, + attachment.attachmentId, + ); + if (readResult.status === "error") { + console.warn("[enhance] failed to read image attachment", readResult.error); + return null; + } + + if (readResult.data.length > MAX_IMAGE_BYTES) { + return null; + } + + return { + base64: bytesToBase64(readResult.data), + mimeType, + filename: attachment.attachmentId, + }; +} + +function collectJsonImageReferences(node: unknown): ImageReference[] { + const references: ImageReference[] = []; + + function visit(value: unknown) { + if (!value || typeof value !== "object") { + return; + } + + const node = value as { + type?: unknown; + attrs?: Record; + content?: unknown[]; + }; + if (node.type === "image" || node.type === "fileAttachment") { + const mimeType = + typeof node.attrs?.mimeType === "string" ? node.attrs.mimeType : ""; + const src = typeof node.attrs?.src === "string" ? node.attrs.src : ""; + const isImage = + node.type === "image" || + mimeType.startsWith("image/") || + !!getImageMimeType(getPathExtension(src)); + + if (isImage) { + references.push(referenceFromAttrs(node.attrs)); + } + } + + node.content?.forEach(visit); + } + + visit(node); + return references; +} + +function collectMarkdownImageReferences(markdown: string): ImageReference[] { + const references: ImageReference[] = []; + for (const match of markdown.matchAll(MARKDOWN_IMAGE_RE)) { + const src = unwrapMarkdownUrl(match[1]); + references.push(referenceFromSrc(src)); + } + return references; +} + +function referenceFromAttrs( + attrs: Record | undefined, +): ImageReference { + const attachmentId = + typeof attrs?.attachmentId === "string" && attrs.attachmentId.length > 0 + ? attrs.attachmentId + : undefined; + const src = typeof attrs?.src === "string" ? attrs.src : ""; + const path = typeof attrs?.path === "string" ? attrs.path : ""; + const srcRef = referenceFromSrc(src); + + return { + attachmentId, + dataUrl: srcRef.dataUrl, + filename: srcRef.filename ?? getAttachmentFilename(path), + }; +} + +function referenceFromSrc(src: string): ImageReference { + const dataUrl = parseImageDataUrl(src); + if (dataUrl) { + return { dataUrl }; + } + + return { filename: getAttachmentFilename(src) }; +} + +function parseImageDataUrl(src: string): EnhanceImageContext | null { + const match = src.match( + /^data:(image\/(?:gif|jpe?g|png|webp));base64,(.+)$/i, + ); + if (!match) { + return null; + } + + const mimeType = + match[1].toLowerCase() === "image/jpg" ? "image/jpeg" : match[1]; + const base64 = match[2]; + if (getBase64ByteLength(base64) > MAX_IMAGE_BYTES) { + return null; + } + + return { base64, mimeType }; +} + +function unwrapMarkdownUrl(src: string): string { + const unwrapped = + src.startsWith("<") && src.endsWith(">") ? src.slice(1, -1) : src; + return unwrapped.replace(/\\([()])/g, "$1"); +} + +function getImageMimeType(extension: string | undefined): string | null { + if (!extension) { + return null; + } + + return EXTENSION_TO_MIME[extension.toLowerCase()] ?? null; +} + +function getPathExtension(path: string): string | undefined { + const filename = getPathFilename(path); + const dotIndex = filename?.lastIndexOf(".") ?? -1; + return dotIndex >= 0 ? filename?.slice(dotIndex + 1) : undefined; +} + +function getPathFilename(path: string): string | undefined { + const normalized = normalizePathLike(path); + const filename = normalized.split(/[\\/]/).filter(Boolean).pop(); + return filename ? decodePathPart(filename) : undefined; +} + +function getAttachmentFilename(src: string): string | undefined { + const trimmed = src.trim(); + if (!trimmed) { + return undefined; + } + + try { + const url = new URL(trimmed); + if (url.protocol === "asset:" || url.protocol === "file:") { + return getPathFilename(trimmed); + } + + return undefined; + } catch {} + + const normalized = normalizePathLike(trimmed); + if ( + !normalized.includes("/attachments/") && + !normalized.includes("\\attachments\\") + ) { + return undefined; + } + + return getPathFilename(normalized); +} + +function normalizePathLike(path: string): string { + const trimmed = path.trim(); + if (!trimmed) { + return ""; + } + + try { + const url = new URL(trimmed); + if (url.protocol === "asset:" || url.protocol === "file:") { + return decodePathPart(url.pathname); + } + } catch {} + + return decodePathPart(trimmed); +} + +function decodePathPart(value: string): string { + try { + return decodeURIComponent(value); + } catch { + return value; + } +} + +export function getBase64ByteLength(base64: string): number { + const normalized = base64.replace(/\s/g, ""); + const padding = normalized.endsWith("==") + ? 2 + : normalized.endsWith("=") + ? 1 + : 0; + return Math.floor((normalized.length * 3) / 4) - padding; +} + +function bytesToBase64(bytes: number[]): string { + let binary = ""; + const chunkSize = 0x8000; + + for (let i = 0; i < bytes.length; i += chunkSize) { + binary += String.fromCharCode(...bytes.slice(i, i + chunkSize)); + } + + return btoa(binary); +} diff --git a/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-transform.test.ts b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-transform.test.ts index c71449d803..6f77ca8cf6 100644 --- a/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-transform.test.ts +++ b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-transform.test.ts @@ -2,10 +2,22 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { enhanceTransform } from "./enhance-transform"; -const getTemplateByIdMock = vi.hoisted(() => vi.fn()); +const mocks = vi.hoisted(() => ({ + collectEnhanceImageContext: vi.fn(), + getTemplateById: vi.fn(), +})); + +vi.mock("./enhance-images", () => ({ + collectEnhanceImageContext: mocks.collectEnhanceImageContext, +})); vi.mock("~/templates/queries", () => ({ - getTemplateById: getTemplateByIdMock, + getTemplateById: mocks.getTemplateById, +})); + +vi.mock("~/stt/render-transcript", () => ({ + buildRenderTranscriptRequestFromStore: vi.fn(() => null), + renderTranscriptSegments: vi.fn(), })); function createStore() { @@ -39,7 +51,8 @@ describe("enhanceTransform.transformArgs", () => { beforeEach(() => { vi.clearAllMocks(); - getTemplateByIdMock.mockResolvedValue(null); + mocks.collectEnhanceImageContext.mockResolvedValue([]); + mocks.getTemplateById.mockResolvedValue(null); consoleError = vi.spyOn(console, "error").mockImplementation(() => {}); }); @@ -48,7 +61,7 @@ describe("enhanceTransform.transformArgs", () => { }); it("uses the selected template when it can be loaded", async () => { - getTemplateByIdMock.mockResolvedValue({ + mocks.getTemplateById.mockResolvedValue({ title: "Standup", description: "Daily sync", sections: [{ title: "Updates", description: null }], @@ -72,7 +85,7 @@ describe("enhanceTransform.transformArgs", () => { }); it("falls back to generic enhancement when template loading fails", async () => { - getTemplateByIdMock.mockRejectedValue(new Error("Failed query")); + mocks.getTemplateById.mockRejectedValue(new Error("Failed query")); const result = await enhanceTransform.transformArgs( { @@ -91,4 +104,64 @@ describe("enhanceTransform.transformArgs", () => { expect.any(Error), ); }); + + it("collects image context from pre- and post-meeting memo content", async () => { + const store = createStore(); + store.forEachRow.mockImplementation( + (tableId: string, callback: (rowId: string) => void) => { + if (tableId === "transcripts") { + callback("transcript-1"); + } + }, + ); + store.getCell.mockImplementation( + (tableId: string, _rowId: string, cellId: string) => { + if (tableId === "sessions" && cellId === "title") { + return "Weekly Review"; + } + if (tableId === "sessions" && cellId === "raw_md") { + return "![post](asset://localhost/post.png)"; + } + if (tableId === "transcripts" && cellId === "session_id") { + return "session-1"; + } + if (tableId === "transcripts" && cellId === "started_at") { + return 100; + } + if (tableId === "transcripts" && cellId === "memo_md") { + return "![pre](asset://localhost/pre.png)"; + } + + return ""; + }, + ); + + await enhanceTransform.transformArgs( + { + sessionId: "session-1", + enhancedNoteId: "note-1", + }, + store, + { + getValue: vi.fn((valueId: string) => { + if (valueId === "current_llm_provider") { + return "openai"; + } + if (valueId === "current_llm_model") { + return "gpt-4o"; + } + if (valueId === "ai_language") { + return "en"; + } + + return ""; + }), + } as any, + ); + + expect(mocks.collectEnhanceImageContext).toHaveBeenCalledWith("session-1", [ + "![pre](asset://localhost/pre.png)", + "![post](asset://localhost/post.png)", + ]); + }); }); diff --git a/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-transform.ts b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-transform.ts index dcbff231cf..307d4bfbce 100644 --- a/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-transform.ts +++ b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-transform.ts @@ -6,8 +6,10 @@ import type { } from "@hypr/plugin-template"; import type { TaskArgsMap, TaskArgsMapTransformed, TaskConfig } from "."; +import { collectEnhanceImageContext } from "./enhance-images"; import { getSessionEventById } from "~/session/utils"; +import { modelSupportsImageInput } from "~/settings/ai/shared/model-capabilities"; import type { Store as MainStore } from "~/store/tinybase/store/main"; import type { Store as SettingsStore } from "~/store/tinybase/store/settings"; import { @@ -56,6 +58,15 @@ async function transformArgs( sessionContext.transcriptsMeta, store, ); + const imageContext = modelSupportsImageInput( + getOptionalSettingsValue(settingsStore, "current_llm_provider"), + getOptionalSettingsValue(settingsStore, "current_llm_model"), + ) + ? await collectEnhanceImageContext(sessionId, [ + sessionContext.preMeetingMemo, + sessionContext.postMeetingMemo, + ]) + : []; return { language, @@ -65,6 +76,7 @@ async function transformArgs( preMeetingMemo: sessionContext.preMeetingMemo, postMeetingMemo: sessionContext.postMeetingMemo, transcripts: formatTranscripts(segments, sessionContext.transcriptsMeta), + imageContext, }; } @@ -117,6 +129,14 @@ function getLanguage(settingsStore: SettingsStore): string | null { return typeof value === "string" && value.length > 0 ? value : null; } +function getOptionalSettingsValue( + settingsStore: SettingsStore, + valueId: string, +): string | undefined { + const value = settingsStore.getValue(valueId as any); + return typeof value === "string" && value.length > 0 ? value : undefined; +} + function getSessionContext(sessionId: string, store: MainStore) { const transcriptsMeta = collectTranscripts(sessionId, store); const rawMd = getStringCell(store, "sessions", sessionId, "raw_md"); diff --git a/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-workflow.ts b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-workflow.ts index 62bbfc8f45..35966931f4 100644 --- a/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-workflow.ts +++ b/apps/desktop/src/store/zustand/ai-task/task-configs/enhance-workflow.ts @@ -1,9 +1,11 @@ import { generateText, + type ImagePart, type LanguageModel, Output, smoothStream, streamText, + type TextPart, } from "ai"; import { z } from "zod"; @@ -14,6 +16,7 @@ import { import { templateSectionSchema } from "@hypr/store"; import type { TaskArgsMapTransformed, TaskConfig } from "."; +import type { EnhanceImageContext } from "./enhance-images"; import { createEnhanceValidator } from "./enhance-validator"; import type { Store } from "~/store/tinybase/store/main"; @@ -24,6 +27,8 @@ import { assertCanonicalTemplateSections } from "~/templates/codec"; const AI_GENERATION_MAX_RETRIES = 4; const TEMPLATE_MAX_OUTPUT_TOKENS = 2048; const SUMMARY_MAX_OUTPUT_TOKENS = 8192; +const IMAGE_CONTEXT_NOTE = + "Attached note images are included as visual context. Use visible text, diagrams, screenshots, and other image content when it materially improves the summary."; export const enhanceWorkflow: Pick< TaskConfig<"enhance">, @@ -58,7 +63,10 @@ async function* executeWorkflow(params: { }; const system = await getSystemPrompt(argsWithTemplate); - const prompt = await getUserPrompt(argsWithTemplate, store); + const prompt = withImageContextNote( + await getUserPrompt(argsWithTemplate, store), + argsWithTemplate.imageContext.length, + ); yield* generateSummary({ model, @@ -137,13 +145,17 @@ async function generateTemplateIfNeeded(params: { onProgress({ type: "analyzing" }); const schema = z.object({ sections: z.array(templateSectionSchema) }); - const userPrompt = await getUserPrompt(args, store); + const userPrompt = withImageContextNote( + await getUserPrompt(args, store), + args.imageContext.length, + ); const result = await generateStructuredOutput({ model, schema, signal, prompt: createTemplatePrompt(userPrompt, schema), + imageContext: args.imageContext, }); if (!result) { @@ -189,8 +201,9 @@ async function generateStructuredOutput(params: { schema: T; signal: AbortSignal; prompt: string; + imageContext: EnhanceImageContext[]; }): Promise | null> { - const { model, schema, signal, prompt } = params; + const { model, schema, signal, prompt, imageContext } = params; try { const result = await generateText({ @@ -200,7 +213,7 @@ async function generateStructuredOutput(params: { abortSignal: signal, maxRetries: AI_GENERATION_MAX_RETRIES, maxOutputTokens: TEMPLATE_MAX_OUTPUT_TOKENS, - prompt, + ...createPromptInput(prompt, imageContext), }); if (!result.output) { @@ -216,7 +229,7 @@ async function generateStructuredOutput(params: { abortSignal: signal, maxRetries: AI_GENERATION_MAX_RETRIES, maxOutputTokens: TEMPLATE_MAX_OUTPUT_TOKENS, - prompt, + ...createPromptInput(prompt, imageContext), }); const jsonMatch = fallbackResult.text.match(/\{[\s\S]*\}/); @@ -268,7 +281,7 @@ IMPORTANT: Previous attempt failed. ${previousFeedback}`; const result = streamText({ model, system, - prompt: enhancedPrompt, + ...createPromptInput(enhancedPrompt, args.imageContext), abortSignal: combinedController.signal, maxRetries: AI_GENERATION_MAX_RETRIES, maxOutputTokens: SUMMARY_MAX_OUTPUT_TOKENS, @@ -296,3 +309,44 @@ IMPORTANT: Previous attempt failed. ${previousFeedback}`; }, ); } + +function withImageContextNote(prompt: string, imageCount: number): string { + if (imageCount === 0) { + return prompt; + } + + return `${prompt} + +${IMAGE_CONTEXT_NOTE}`; +} + +function createPromptInput( + prompt: string, + imageContext: EnhanceImageContext[], +): + | { prompt: string } + | { + messages: Array<{ role: "user"; content: Array }>; + } { + if (imageContext.length === 0) { + return { prompt }; + } + + return { + messages: [ + { + role: "user", + content: [ + { type: "text", text: prompt }, + ...imageContext.map( + (image): ImagePart => ({ + type: "image", + image: image.base64, + mediaType: image.mimeType, + }), + ), + ], + }, + ], + }; +} diff --git a/apps/desktop/src/store/zustand/ai-task/task-configs/index.ts b/apps/desktop/src/store/zustand/ai-task/task-configs/index.ts index 4edd4d4400..e09d7cc3cb 100644 --- a/apps/desktop/src/store/zustand/ai-task/task-configs/index.ts +++ b/apps/desktop/src/store/zustand/ai-task/task-configs/index.ts @@ -7,6 +7,7 @@ import type { TitleUser, } from "@hypr/plugin-template"; +import type { EnhanceImageContext } from "./enhance-images"; import { enhanceSuccess } from "./enhance-success"; import { enhanceTransform } from "./enhance-transform"; import { enhanceWorkflow } from "./enhance-workflow"; @@ -27,7 +28,10 @@ export interface TaskArgsMap { } export interface TaskArgsMapTransformed { - enhance: EnhanceSystem & EnhanceUser; + enhance: EnhanceSystem & + EnhanceUser & { + imageContext: EnhanceImageContext[]; + }; title: TitleSystem & TitleUser; } diff --git a/crates/fs-sync-core/src/lib.rs b/crates/fs-sync-core/src/lib.rs index e29362a7c0..f5bd61cc51 100644 --- a/crates/fs-sync-core/src/lib.rs +++ b/crates/fs-sync-core/src/lib.rs @@ -250,6 +250,14 @@ impl FsSyncCore { Ok(attachments) } + pub fn attachment_read(&self, session_id: &str, attachment_id: &str) -> Result> { + let session_dir = self.resolve_session_dir(session_id); + let attachments_dir = session_dir.join("attachments"); + let safe_attachment_id = sanitize_filename(attachment_id)?; + + Ok(std::fs::read(attachments_dir.join(safe_attachment_id))?) + } + pub fn attachment_remove(&self, session_id: &str, attachment_id: &str) -> Result<()> { let session_dir = self.resolve_session_dir(session_id); let attachments_dir = session_dir.join("attachments"); @@ -576,6 +584,22 @@ mod tests { .assert(predicates::path::exists()); } + #[test] + fn attachment_read_returns_saved_bytes() { + let temp = TempDir::new().unwrap(); + temp.child("sessions") + .child(UUID_1) + .create_dir_all() + .unwrap(); + + let core = FsSyncCore::new(temp.path().to_path_buf()); + core.attachment_save(UUID_1, b"hello", "image.png").unwrap(); + + let bytes = core.attachment_read(UUID_1, "image.png").unwrap(); + + assert_eq!(bytes, b"hello"); + } + #[test] fn attachment_remove_missing_noop() { let temp = TempDir::new().unwrap(); diff --git a/plugins/fs-sync/build.rs b/plugins/fs-sync/build.rs index 754ad4ed54..97eaf56129 100644 --- a/plugins/fs-sync/build.rs +++ b/plugins/fs-sync/build.rs @@ -22,6 +22,7 @@ const COMMANDS: &[&str] = &[ "entity_dir", "attachment_save", "attachment_list", + "attachment_read", "attachment_remove", ]; diff --git a/plugins/fs-sync/js/bindings.gen.ts b/plugins/fs-sync/js/bindings.gen.ts index da22bd3ebf..fccfcc6293 100644 --- a/plugins/fs-sync/js/bindings.gen.ts +++ b/plugins/fs-sync/js/bindings.gen.ts @@ -190,6 +190,14 @@ async attachmentList(sessionId: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:fs-sync|attachment_read", { sessionId, attachmentId }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, async attachmentRemove(sessionId: string, attachmentId: string) : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("plugin:fs-sync|attachment_remove", { sessionId, attachmentId }) }; diff --git a/plugins/fs-sync/permissions/autogenerated/commands/attachment_read.toml b/plugins/fs-sync/permissions/autogenerated/commands/attachment_read.toml new file mode 100644 index 0000000000..6cc69d0580 --- /dev/null +++ b/plugins/fs-sync/permissions/autogenerated/commands/attachment_read.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-attachment-read" +description = "Enables the attachment_read command without any pre-configured scope." +commands.allow = ["attachment_read"] + +[[permission]] +identifier = "deny-attachment-read" +description = "Denies the attachment_read command without any pre-configured scope." +commands.deny = ["attachment_read"] diff --git a/plugins/fs-sync/permissions/autogenerated/reference.md b/plugins/fs-sync/permissions/autogenerated/reference.md index a5e9c0678a..97b7734809 100644 --- a/plugins/fs-sync/permissions/autogenerated/reference.md +++ b/plugins/fs-sync/permissions/autogenerated/reference.md @@ -27,6 +27,7 @@ Default permissions for the fs-sync plugin - `allow-entity-dir` - `allow-attachment-save` - `allow-attachment-list` +- `allow-attachment-read` - `allow-attachment-remove` ## Permission Table @@ -67,6 +68,32 @@ Denies the attachment_list command without any pre-configured scope. +`fs-sync:allow-attachment-read` + + + + +Enables the attachment_read command without any pre-configured scope. + + + + + + + +`fs-sync:deny-attachment-read` + + + + +Denies the attachment_read command without any pre-configured scope. + + + + + + + `fs-sync:allow-attachment-remove` diff --git a/plugins/fs-sync/permissions/default.toml b/plugins/fs-sync/permissions/default.toml index c3bb37530c..3d6ae2005c 100644 --- a/plugins/fs-sync/permissions/default.toml +++ b/plugins/fs-sync/permissions/default.toml @@ -24,5 +24,6 @@ permissions = [ "allow-entity-dir", "allow-attachment-save", "allow-attachment-list", + "allow-attachment-read", "allow-attachment-remove", ] diff --git a/plugins/fs-sync/permissions/schemas/schema.json b/plugins/fs-sync/permissions/schemas/schema.json index 27bc032138..658f3b9955 100644 --- a/plugins/fs-sync/permissions/schemas/schema.json +++ b/plugins/fs-sync/permissions/schemas/schema.json @@ -306,6 +306,18 @@ "const": "deny-attachment-list", "markdownDescription": "Denies the attachment_list command without any pre-configured scope." }, + { + "description": "Enables the attachment_read command without any pre-configured scope.", + "type": "string", + "const": "allow-attachment-read", + "markdownDescription": "Enables the attachment_read command without any pre-configured scope." + }, + { + "description": "Denies the attachment_read command without any pre-configured scope.", + "type": "string", + "const": "deny-attachment-read", + "markdownDescription": "Denies the attachment_read command without any pre-configured scope." + }, { "description": "Enables the attachment_remove command without any pre-configured scope.", "type": "string", @@ -595,10 +607,10 @@ "markdownDescription": "Denies the write_json_batch command without any pre-configured scope." }, { - "description": "Default permissions for the fs-sync plugin\n#### This default permission set includes:\n\n- `allow-deserialize`\n- `allow-write-json-batch`\n- `allow-write-document-batch`\n- `allow-read-document-batch`\n- `allow-list-folders`\n- `allow-move-session`\n- `allow-create-folder`\n- `allow-rename-folder`\n- `allow-delete-folder`\n- `allow-audio-exist`\n- `allow-audio-delete`\n- `allow-audio-delete-orphaned-expired`\n- `allow-audio-import`\n- `allow-audio-source-metadata`\n- `allow-audio-path`\n- `allow-session-dir`\n- `allow-load-session-content`\n- `allow-delete-session-folder`\n- `allow-scan-and-read`\n- `allow-chat-dir`\n- `allow-entity-dir`\n- `allow-attachment-save`\n- `allow-attachment-list`\n- `allow-attachment-remove`", + "description": "Default permissions for the fs-sync plugin\n#### This default permission set includes:\n\n- `allow-deserialize`\n- `allow-write-json-batch`\n- `allow-write-document-batch`\n- `allow-read-document-batch`\n- `allow-list-folders`\n- `allow-move-session`\n- `allow-create-folder`\n- `allow-rename-folder`\n- `allow-delete-folder`\n- `allow-audio-exist`\n- `allow-audio-delete`\n- `allow-audio-delete-orphaned-expired`\n- `allow-audio-import`\n- `allow-audio-source-metadata`\n- `allow-audio-path`\n- `allow-session-dir`\n- `allow-load-session-content`\n- `allow-delete-session-folder`\n- `allow-scan-and-read`\n- `allow-chat-dir`\n- `allow-entity-dir`\n- `allow-attachment-save`\n- `allow-attachment-list`\n- `allow-attachment-read`\n- `allow-attachment-remove`", "type": "string", "const": "default", - "markdownDescription": "Default permissions for the fs-sync plugin\n#### This default permission set includes:\n\n- `allow-deserialize`\n- `allow-write-json-batch`\n- `allow-write-document-batch`\n- `allow-read-document-batch`\n- `allow-list-folders`\n- `allow-move-session`\n- `allow-create-folder`\n- `allow-rename-folder`\n- `allow-delete-folder`\n- `allow-audio-exist`\n- `allow-audio-delete`\n- `allow-audio-delete-orphaned-expired`\n- `allow-audio-import`\n- `allow-audio-source-metadata`\n- `allow-audio-path`\n- `allow-session-dir`\n- `allow-load-session-content`\n- `allow-delete-session-folder`\n- `allow-scan-and-read`\n- `allow-chat-dir`\n- `allow-entity-dir`\n- `allow-attachment-save`\n- `allow-attachment-list`\n- `allow-attachment-remove`" + "markdownDescription": "Default permissions for the fs-sync plugin\n#### This default permission set includes:\n\n- `allow-deserialize`\n- `allow-write-json-batch`\n- `allow-write-document-batch`\n- `allow-read-document-batch`\n- `allow-list-folders`\n- `allow-move-session`\n- `allow-create-folder`\n- `allow-rename-folder`\n- `allow-delete-folder`\n- `allow-audio-exist`\n- `allow-audio-delete`\n- `allow-audio-delete-orphaned-expired`\n- `allow-audio-import`\n- `allow-audio-source-metadata`\n- `allow-audio-path`\n- `allow-session-dir`\n- `allow-load-session-content`\n- `allow-delete-session-folder`\n- `allow-scan-and-read`\n- `allow-chat-dir`\n- `allow-entity-dir`\n- `allow-attachment-save`\n- `allow-attachment-list`\n- `allow-attachment-read`\n- `allow-attachment-remove`" } ] } diff --git a/plugins/fs-sync/src/commands.rs b/plugins/fs-sync/src/commands.rs index a56bf9fe80..bb34e360b7 100644 --- a/plugins/fs-sync/src/commands.rs +++ b/plugins/fs-sync/src/commands.rs @@ -358,6 +358,20 @@ pub(crate) async fn attachment_list( }) } +#[tauri::command] +#[specta::specta] +pub(crate) async fn attachment_read( + app: tauri::AppHandle, + session_id: String, + attachment_id: String, +) -> Result, String> { + spawn_blocking!({ + app.fs_sync() + .attachment_read(&session_id, &attachment_id) + .map_err(|e| e.to_string()) + }) +} + #[tauri::command] #[specta::specta] pub(crate) async fn attachment_remove( diff --git a/plugins/fs-sync/src/ext.rs b/plugins/fs-sync/src/ext.rs index a22e7e0229..9257156090 100644 --- a/plugins/fs-sync/src/ext.rs +++ b/plugins/fs-sync/src/ext.rs @@ -64,6 +64,14 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> FsSync<'a, R, M> { self.core()?.attachment_list(session_id) } + pub fn attachment_read( + &self, + session_id: &str, + attachment_id: &str, + ) -> Result, crate::Error> { + self.core()?.attachment_read(session_id, attachment_id) + } + pub fn attachment_remove( &self, session_id: &str, diff --git a/plugins/fs-sync/src/lib.rs b/plugins/fs-sync/src/lib.rs index 12be111dbe..181dc5ec41 100644 --- a/plugins/fs-sync/src/lib.rs +++ b/plugins/fs-sync/src/lib.rs @@ -34,6 +34,7 @@ fn make_specta_builder() -> tauri_specta::Builder { commands::entity_dir::, commands::attachment_save::, commands::attachment_list::, + commands::attachment_read::, commands::attachment_remove::, ]) .events(tauri_specta::collect_events![AudioImportEvent])