From 7b0e99629fd44636241edd02958537aee3ca215c Mon Sep 17 00:00:00 2001 From: Daniel van Strien Date: Mon, 27 Apr 2026 10:55:19 +0100 Subject: [PATCH 1/2] Pi: emit "input": ["text", "image"] for vision-capable models When the model has pipeline_tag === "image-text-to-text", the Pi "Use this model" snippet now writes the required input field into the generated ~/.pi/agent/models.json, so users get a working config without having to read the docs to discover the flag. Pre-existing behavior is unchanged for text-only models. Cross-ref: huggingface/hub-docs#2408 (and @gary149's review suggestion there to surface this in the snippet directly). Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/tasks/src/local-apps.spec.ts | 19 +++++++++++++++++++ packages/tasks/src/local-apps.ts | 7 ++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts index 15a562ff97..f0d5d6641b 100644 --- a/packages/tasks/src/local-apps.spec.ts +++ b/packages/tasks/src/local-apps.spec.ts @@ -138,6 +138,25 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\ expect(snippet[2].content).toContain("pi"); }); + it("pi - vision", async () => { + const { snippet: snippetFunc } = LOCAL_APPS["pi"]; + const model: ModelData = { + id: "unsloth/Qwen3.6-35B-A3B-GGUF", + pipeline_tag: "image-text-to-text", + tags: ["conversational"], + gguf: { total: 1, context_length: 4096, chat_template: "{% if tools %}" }, + inference: "", + }; + const snippet = snippetFunc(model); + + expect(snippet[0].content).toContain(`llama-server -hf unsloth/Qwen3.6-35B-A3B-GGUF:{{QUANT_TAG}} --jinja`); + expect(snippet[1].setup).toContain("npm install -g @mariozechner/pi-coding-agent"); + expect(snippet[1].content).toContain(`"id": "Qwen3.6-35B-A3B-GGUF"`); + expect(snippet[1].content).toContain(`"input"`); + expect(snippet[1].content).toContain(`"image"`); + expect(snippet[2].content).toContain("pi"); + }); + it("pi - mlx", async () => { const { snippet: snippetFunc } = LOCAL_APPS["pi"]; const model: ModelData = { diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index 63fc3c45cf..5f15e3b739 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -466,6 +466,7 @@ const snippetMlxLm = (model: ModelData): LocalAppSnippet[] => { const snippetPi = (model: ModelData, filepath?: string): LocalAppSnippet[] => { const modelName = model.id.split("/").pop() ?? model.id; const isMLX = isMlxModel(model); + const isVision = model.pipeline_tag === "image-text-to-text"; // Step 1: Server — differs by backend const serverStep: LocalAppSnippet = isMLX @@ -481,6 +482,10 @@ const snippetPi = (model: ModelData, filepath?: string): LocalAppSnippet[] => { }; // Step 2: Pi config — port and provider name differ + const modelEntry: Record = { id: isMLX ? model.id : modelName }; + if (isVision) { + modelEntry.input = ["text", "image"]; + } const modelsJson = JSON.stringify( { providers: { @@ -488,7 +493,7 @@ const snippetPi = (model: ModelData, filepath?: string): LocalAppSnippet[] => { baseUrl: "http://localhost:8080/v1", api: "openai-completions", apiKey: "none", - models: [{ id: isMLX ? model.id : modelName }], + models: [modelEntry], }, }, }, From c4a9eb0745ec61ad6b148af096893cd6e7c73bab Mon Sep 17 00:00:00 2001 From: Daniel van Strien Date: Mon, 27 Apr 2026 11:21:28 +0100 Subject: [PATCH 2/2] Pi vision test: also assert "text" in input array Per Cursor Bugbot review: a regression dropping "text" from ["text", "image"] would have gone undetected. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/tasks/src/local-apps.spec.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts index f0d5d6641b..acfe4bd6ca 100644 --- a/packages/tasks/src/local-apps.spec.ts +++ b/packages/tasks/src/local-apps.spec.ts @@ -153,6 +153,7 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\ expect(snippet[1].setup).toContain("npm install -g @mariozechner/pi-coding-agent"); expect(snippet[1].content).toContain(`"id": "Qwen3.6-35B-A3B-GGUF"`); expect(snippet[1].content).toContain(`"input"`); + expect(snippet[1].content).toContain(`"text"`); expect(snippet[1].content).toContain(`"image"`); expect(snippet[2].content).toContain("pi"); });