feat: add llama-server as a first-class AI provider

AlexsJones · claude · AlexsJones · commit 86ec4ae6b202 · 2026-04-12T16:13:55.000+01:00
Add llama-server (llama.cpp's official server binary) as an AI provider
following the same pattern as LM Studio across TUI onboarding, BubbleTea
wizards, web UI, agent-runner, and apiserver. The existing llama-cpp
node-probe target on port 8080 already discovers llama-server instances,
so the UI maps llama-server → llama-cpp for node detection.

Also changes the local dev API_ADDR default from :8080 to :8081 to fix
an existing Vite proxy mismatch and free port 8080 for inference servers
during local development.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/Makefile b/Makefile
@@ -208,7 +208,7 @@ web-clean: ## Remove frontend build artifacts
 
 SYMPOZIUM_TOKEN ?= $(shell t=$$(kubectl get secret -n sympozium-system -l app.kubernetes.io/component=apiserver -o jsonpath='{.items[0].data.token}' 2>/dev/null | base64 -d 2>/dev/null); [ -n "$$t" ] && echo "$$t" || echo dev-token)
 SYMPOZIUM_NAMESPACE ?= sympozium-system
-API_ADDR ?= :8080
+API_ADDR ?= :8081
 VITE_PORT ?= 5173
 
 API_LOCAL_PORT ?= 8081
diff --git a/cmd/agent-runner/main.go b/cmd/agent-runner/main.go
@@ -64,7 +64,7 @@ func init() {
 // (single-GPU, request queuing) where per-request timeouts matter.
 func isLocalProvider(provider string) bool {
 	switch provider {
-	case "ollama", "lm-studio", "unsloth", "vllm", "llamacpp", "local":
+	case "ollama", "lm-studio", "llama-server", "unsloth", "vllm", "llamacpp", "local":
 		return true
 	}
 	return false
diff --git a/cmd/agent-runner/provider_openai.go b/cmd/agent-runner/provider_openai.go
@@ -61,6 +61,8 @@ func newOpenAIProvider(provider, apiKey, baseURL, model, systemPrompt, task stri
 			opts = append(opts, openaioption.WithBaseURL("http://ollama.default.svc:11434/v1"))
 		} else if provider == "lm-studio" {
 			opts = append(opts, openaioption.WithBaseURL("http://localhost:1234/v1"))
+		} else if provider == "llama-server" {
+			opts = append(opts, openaioption.WithBaseURL("http://localhost:8080/v1"))
 		} else if provider == "unsloth" {
 			opts = append(opts, openaioption.WithBaseURL("http://localhost:8080/v1"))
 		}
diff --git a/cmd/sympozium/main.go b/cmd/sympozium/main.go
@@ -616,9 +616,10 @@ func runOnboard() error {
 	fmt.Println("    3) Azure OpenAI")
 	fmt.Println("    4) Ollama          (local, no API key needed)")
 	fmt.Println("    5) LM Studio       (local, optional API key)")
-	fmt.Println("    6) AWS Bedrock     (Claude, Nova, etc.)")
-	fmt.Println("    7) Other / OpenAI-compatible")
-	providerChoice := prompt(reader, "  Choice [1-7]", "1")
+	fmt.Println("    6) llama-server    (local, no API key needed)")
+	fmt.Println("    7) AWS Bedrock     (Claude, Nova, etc.)")
+	fmt.Println("    8) Other / OpenAI-compatible")
+	providerChoice := prompt(reader, "  Choice [1-8]", "1")
 
 	var providerName, secretEnvKey, modelName, baseURL string
 	switch providerChoice {
@@ -644,6 +645,12 @@ func runOnboard() error {
 		modelName = prompt(reader, "  Model name", "")
 		fmt.Println("  💡 No API key needed for LM Studio.")
 	case "6":
+		providerName = "llama-server"
+		secretEnvKey = ""
+		baseURL = prompt(reader, "  llama-server URL", "http://localhost:8080/v1")
+		modelName = prompt(reader, "  Model name", "")
+		fmt.Println("  💡 No API key needed for llama-server.")
+	case "7":
 		providerName = "bedrock"
 		secretEnvKey = "" // Bedrock uses multiple AWS credential keys, handled below.
 		awsRegion := prompt(reader, "  AWS Region", "us-east-1")
@@ -675,7 +682,7 @@ func runOnboard() error {
 		if err := kubectl(args...); err != nil {
 			return fmt.Errorf("create Bedrock provider secret: %w", err)
 		}
-	case "7":
+	case "8":
 		providerName = prompt(reader, "  Provider name", "custom")
 		secretEnvKey = prompt(reader, "  API key env var name (empty if none)", "API_KEY")
 		baseURL = prompt(reader, "  API base URL", "")
@@ -1823,6 +1830,7 @@ var providerSuggestions = []suggestion{
 	{"anthropic", "Anthropic (Claude)"},
 	{"azure-openai", "Azure OpenAI Service"},
 	{"ollama", "Ollama (local)"},
+	{"llama-server", "llama-server (llama.cpp local)"},
 	{"bedrock", "AWS Bedrock (Claude, Nova, etc.)"},
 	{"openai-compatible", "OpenAI-compatible endpoint"},
 }
@@ -2206,6 +2214,7 @@ const (
 	wizStepWhatsAppQR                        // auto — stream QR from pod logs
 	wizStepDone                              // auto — show result
 	wizStepLMStudioAPIKeyRequired            // y/n: LM Studio requires API key?
+	wizStepLlamaServerAPIKeyRequired         // y/n: llama-server requires API key?
 	wizStepAWSRegion                         // text: AWS region for Bedrock
 	wizStepAWSAccessKeyID                    // text: AWS Access Key ID
 	wizStepAWSSecretAccessKey                // text: AWS Secret Access Key
@@ -2215,7 +2224,8 @@ const (
 	wizStepPersonaPick                   // menu: select a persona pack
 	wizStepPersonaProvider               // menu 1-6: provider
 	wizStepPersonaBaseURL                // text: base URL
-	wizStepPersonaLMStudioAPIKeyRequired // y/n: LM Studio requires API key?
+	wizStepPersonaLMStudioAPIKeyRequired     // y/n: LM Studio requires API key?
+	wizStepPersonaLlamaServerAPIKeyRequired  // y/n: llama-server requires API key?
 	wizStepPersonaAPIKey                 // text: API key
 	wizStepPersonaModel                  // text: model name
 	wizStepPersonaGithubRepo             // text: GitHub repo (owner/repo)
@@ -6042,6 +6052,9 @@ func resolveInstanceProvider(inst sympoziumv1alpha1.SympoziumInstance) string {
 		if strings.Contains(u, "lm-studio") || strings.Contains(u, ":1234") {
 			return "lm-studio"
 		}
+		if strings.Contains(u, "llama-server") {
+			return "llama-server"
+		}
 		return "custom"
 	}
 	return "-"
@@ -8019,6 +8032,8 @@ func tuiCreateRun(ns, instance, task string) (string, error) {
 			provider = "ollama"
 		} else if strings.Contains(inst.Spec.Agents.Default.BaseURL, "lm-studio") || strings.Contains(inst.Spec.Agents.Default.BaseURL, ":1234") {
 			provider = "lm-studio"
+		} else if strings.Contains(inst.Spec.Agents.Default.BaseURL, "llama-server") {
+			provider = "llama-server"
 		} else {
 			provider = "custom"
 		}
@@ -8692,7 +8707,7 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
 		}
 		w.instanceName = val
 		w.step = wizStepProvider
-		m.input.Placeholder = "Choice [1-7] (default: 1 — OpenAI)"
+		m.input.Placeholder = "Choice [1-8] (default: 1 — OpenAI)"
 		return m, nil
 
 	case wizStepProvider:
@@ -8727,12 +8742,18 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
 			m.input.Placeholder = "LM Studio URL (default: http://localhost:1234/v1)"
 			return m, nil
 		case "6":
+			w.providerName = "llama-server"
+			w.secretEnvKey = ""
+			w.step = wizStepBaseURL
+			m.input.Placeholder = "llama-server URL (default: http://localhost:8080/v1)"
+			return m, nil
+		case "7":
 			w.providerName = "bedrock"
 			w.secretEnvKey = ""
 			w.step = wizStepAWSRegion
 			m.input.Placeholder = "AWS Region (default: us-east-1)"
 			return m, nil
-		case "7":
+		case "8":
 			w.providerName = "custom"
 			w.secretEnvKey = "API_KEY"
 			w.step = wizStepBaseURL
@@ -8754,13 +8775,22 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
 		if val == "" && w.providerName == "lm-studio" {
 			val = "http://localhost:1234/v1"
 		}
+		if val == "" && w.providerName == "llama-server" {
+			val = "http://localhost:8080/v1"
+		}
 		w.baseURL = val
 		if w.providerName == "lm-studio" {
 			// LM Studio — ask if API key is required.
 			w.step = wizStepLMStudioAPIKeyRequired
 			m.input.Placeholder = "Does LM Studio require an API key? [Y/n]"
 			return m, nil
 		}
+		if w.providerName == "llama-server" {
+			// llama-server — ask if API key is required.
+			w.step = wizStepLlamaServerAPIKeyRequired
+			m.input.Placeholder = "Does llama-server require an API key? [Y/n]"
+			return m, nil
+		}
 		if w.secretEnvKey == "" {
 			// Ollama — no API key, go straight to model.
 			w.step = wizStepModel
@@ -8787,6 +8817,21 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
 		}
 		return m, nil
 
+	case wizStepLlamaServerAPIKeyRequired:
+		w.step = wizStepModel // default fallback
+		switch strings.ToLower(val) {
+		case "y", "yes":
+			w.secretEnvKey = "API_KEY"
+			w.step = wizStepAPIKey
+			m.input.Placeholder = "Please enter the API key for llama-server:"
+		default:
+			// User skips API key - show warning
+			m.addLog(tuiErrorStyle.Render("⚠  Warning: Ensure your llama-server is running without authentication"))
+			w.step = wizStepModel
+			m.input.Placeholder = "Model name (default: llama3)"
+		}
+		return m, nil
+
 	case wizStepAWSRegion:
 		if val == "" {
 			val = "us-east-1"
@@ -9069,7 +9114,7 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
 		}
 
 		w.step = wizStepPersonaProvider
-		m.input.Placeholder = "Choice [1-7] (default: 1 — OpenAI)"
+		m.input.Placeholder = "Choice [1-8] (default: 1 — OpenAI)"
 		return m, nil
 
 	case wizStepPersonaProvider:
@@ -9103,12 +9148,18 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
 			m.input.Placeholder = "LM Studio URL (default: http://localhost:1234/v1)"
 			return m, nil
 		case "6":
+			w.providerName = "llama-server"
+			w.secretEnvKey = ""
+			w.step = wizStepPersonaBaseURL
+			m.input.Placeholder = "llama-server URL (default: http://localhost:8080/v1)"
+			return m, nil
+		case "7":
 			w.providerName = "bedrock"
 			w.secretEnvKey = ""
 			w.step = wizStepAWSRegion
 			m.input.Placeholder = "AWS Region (default: us-east-1)"
 			return m, nil
-		case "7":
+		case "8":
 			w.providerName = "custom"
 			w.secretEnvKey = "API_KEY"
 			w.step = wizStepPersonaBaseURL
@@ -9129,13 +9180,22 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
 		if val == "" && w.providerName == "lm-studio" {
 			val = "http://localhost:1234/v1"
 		}
+		if val == "" && w.providerName == "llama-server" {
+			val = "http://localhost:8080/v1"
+		}
 		w.baseURL = val
 		if w.providerName == "lm-studio" {
 			// LM Studio — ask if API key is required.
 			w.step = wizStepPersonaLMStudioAPIKeyRequired
 			m.input.Placeholder = "Does LM Studio require an API key? [Y/n]"
 			return m, nil
 		}
+		if w.providerName == "llama-server" {
+			// llama-server — ask if API key is required.
+			w.step = wizStepPersonaLlamaServerAPIKeyRequired
+			m.input.Placeholder = "Does llama-server require an API key? [Y/n]"
+			return m, nil
+		}
 		if w.secretEnvKey == "" {
 			// Ollama — no key needed, skip to model.
 			w.step = wizStepPersonaModel
@@ -9161,6 +9221,21 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
 		}
 		return m, nil
 
+	case wizStepPersonaLlamaServerAPIKeyRequired:
+		w.step = wizStepPersonaModel // default fallback
+		switch strings.ToLower(val) {
+		case "y", "yes":
+			w.secretEnvKey = "API_KEY"
+			w.step = wizStepPersonaAPIKey
+			m.input.Placeholder = "Please enter the API key for llama-server:"
+		default:
+			// User skips API key - show warning
+			m.addLog(tuiErrorStyle.Render("⚠  Warning: Ensure your llama-server is running without authentication"))
+			w.step = wizStepPersonaModel
+			m.input.Placeholder = "Model name (default: llama3)"
+		}
+		return m, nil
+
 	case wizStepPersonaAPIKey:
 		w.apiKey = val
 		if w.apiKey == "" && w.secretEnvKey != "" {
@@ -9492,8 +9567,9 @@ func (m tuiModel) renderWizardPanel(h int) string {
 		lines = append(lines, menuNumStyle.Render("  3)")+menuStyle.Render(" Azure OpenAI"))
 		lines = append(lines, menuNumStyle.Render("  4)")+menuStyle.Render(" Ollama          (local, no API key needed)"))
 		lines = append(lines, menuNumStyle.Render("  5)")+menuStyle.Render(" LM Studio       (local, optional API key)"))
-		lines = append(lines, menuNumStyle.Render("  6)")+menuStyle.Render(" AWS Bedrock     (Claude, Nova, etc.)"))
-		lines = append(lines, menuNumStyle.Render("  7)")+menuStyle.Render(" Other / OpenAI-compatible"))
+		lines = append(lines, menuNumStyle.Render("  6)")+menuStyle.Render(" llama-server    (local, no API key needed)"))
+		lines = append(lines, menuNumStyle.Render("  7)")+menuStyle.Render(" AWS Bedrock     (Claude, Nova, etc.)"))
+		lines = append(lines, menuNumStyle.Render("  8)")+menuStyle.Render(" Other / OpenAI-compatible"))
 
 	case wizStepBaseURL:
 		lines = append(lines, stepStyle.Render("  📋 Step 3/9 — AI Provider (continued)"))
@@ -9873,8 +9949,9 @@ func (m tuiModel) renderPersonaWizardPanel(h int,
 		lines = append(lines, menuNumStyle.Render("  [3]")+menuStyle.Render(" Azure OpenAI")+hintStyle.Render(" — Enterprise Azure"))
 		lines = append(lines, menuNumStyle.Render("  [4]")+menuStyle.Render(" Ollama")+hintStyle.Render(" — Local models"))
 		lines = append(lines, menuNumStyle.Render("  [5]")+menuStyle.Render(" LM Studio")+hintStyle.Render(" — Local models"))
-		lines = append(lines, menuNumStyle.Render("  [6]")+menuStyle.Render(" AWS Bedrock")+hintStyle.Render(" — Claude, Nova, etc."))
-		lines = append(lines, menuNumStyle.Render("  [7]")+menuStyle.Render(" Custom")+hintStyle.Render(" — Any OpenAI-compatible API"))
+		lines = append(lines, menuNumStyle.Render("  [6]")+menuStyle.Render(" llama-server")+hintStyle.Render(" — Local models (llama.cpp)"))
+		lines = append(lines, menuNumStyle.Render("  [7]")+menuStyle.Render(" AWS Bedrock")+hintStyle.Render(" — Claude, Nova, etc."))
+		lines = append(lines, menuNumStyle.Render("  [8]")+menuStyle.Render(" Custom")+hintStyle.Render(" — Any OpenAI-compatible API"))
 		lines = append(lines, "")
 
 	case wizStepPersonaBaseURL:
diff --git a/internal/apiserver/server.go b/internal/apiserver/server.go
@@ -816,6 +816,8 @@ func (s *Server) createRun(w http.ResponseWriter, r *http.Request) {
 			provider = "ollama"
 		} else if strings.Contains(inst.Spec.Agents.Default.BaseURL, "lm-studio") || strings.Contains(inst.Spec.Agents.Default.BaseURL, ":1234") {
 			provider = "lm-studio"
+		} else if strings.Contains(inst.Spec.Agents.Default.BaseURL, "llama-server") {
+			provider = "llama-server"
 		} else {
 			provider = "custom"
 		}
@@ -1766,6 +1768,8 @@ func defaultProviderBaseURL(provider string) string {
 		return "http://ollama.default.svc:11434/v1"
 	case "lm-studio":
 		return "http://localhost:1234/v1"
+	case "llama-server":
+		return "http://localhost:8080/v1"
 	default:
 		return ""
 	}
diff --git a/web/cypress/e2e/instance-create-llama-server.cy.ts b/web/cypress/e2e/instance-create-llama-server.cy.ts
@@ -0,0 +1,56 @@
+// Test: create an ad-hoc instance using llama-server.
+
+const INSTANCE = `cypress-llama-server-${Date.now()}`;
+
+describe("Create Instance — llama-server", () => {
+  after(() => {
+    cy.deleteInstance(INSTANCE);
+  });
+
+  it("walks through the wizard and creates the instance", () => {
+    cy.visit("/instances");
+
+    cy.contains("button", "Create Instance", { timeout: 20000 }).click();
+
+    // ── Step 1: Name ──────────────────────────────────────────
+    cy.get("[role='dialog']").find("input[placeholder='my-agent']").clear().type(INSTANCE);
+    cy.wizardNext();
+
+    // ── Step 2: Provider — select llama-server ────────────────
+    cy.get("[role='dialog']").find("button[role='combobox']").click({ force: true });
+    cy.get("[data-radix-popper-content-wrapper]")
+      .contains("llama-server")
+      .click({ force: true });
+    cy.wizardNext();
+
+    // ── Step 3: Auth ──────────────────────────────────────────
+    // llama-server needs no API key — go straight through.
+    cy.wizardNext();
+
+    // ── Step 4: Model ─────────────────────────────────────────
+    cy.get("[role='dialog']").find("input[placeholder='gpt-4o']").clear().type("default");
+    cy.wizardNext();
+
+    // ── Step 5: Skills ────────────────────────────────────────
+    cy.wizardNext();
+
+    // ── Step 6: Heartbeat ─────────────────────────────────────
+    cy.get("[role='dialog']").contains("button", "No heartbeat").click({ force: true });
+    cy.wizardNext();
+
+    // ── Step 7: Channels ──────────────────────────────────────
+    cy.wizardNext();
+
+    // ── Step 8: Confirm ───────────────────────────────────────
+    cy.get("[role='dialog']").contains(INSTANCE);
+    cy.get("[role='dialog']").contains("llama-server");
+    cy.get("[role='dialog']").contains("default");
+    cy.get("[role='dialog']").contains("button", "Create").click({ force: true });
+
+    // Wait for the dialog to close (instance was created).
+    cy.get("[role='dialog']").should("not.exist", { timeout: 20000 });
+
+    // ── Verify instance appears in the list ───────────────────
+    cy.contains(INSTANCE, { timeout: 20000 }).should("be.visible");
+  });
+});
diff --git a/web/cypress/support/e2e.ts b/web/cypress/support/e2e.ts
@@ -32,6 +32,11 @@ declare global {
         name: string,
         opts?: { skills?: string[] },
       ): Chainable<void>;
+      /** Create a minimal llama-server SympoziumInstance via API. */
+      createLlamaServerInstance(
+        name: string,
+        opts?: { skills?: string[] },
+      ): Chainable<void>;
       /** Dispatch an ad-hoc run against an instance via API. Returns the created run name. */
       dispatchRun(
         instanceRef: string,
@@ -128,6 +133,31 @@ Cypress.Commands.add("createLMStudioInstance", (name: string, opts) => {
   });
 });
 
+Cypress.Commands.add("createLlamaServerInstance", (name: string, opts) => {
+  const body: Record<string, unknown> = {
+    name,
+    provider: "llama-server",
+    model: "default",
+    baseURL: "http://host.docker.internal:8080/v1",
+  };
+  if (opts?.skills?.length) {
+    body.skills = opts.skills.map((s) => ({ skillPackRef: s }));
+  }
+  cy.request({
+    method: "POST",
+    url: "/api/v1/instances?namespace=default",
+    headers: authHeaders(),
+    body,
+    failOnStatusCode: false,
+  }).then((resp) => {
+    if (resp.status >= 400 && resp.status !== 409) {
+      throw new Error(
+        `createLlamaServerInstance failed (${resp.status}): ${JSON.stringify(resp.body)}`,
+      );
+    }
+  });
+});
+
 Cypress.Commands.add("dispatchRun", (instanceRef: string, task: string, opts) => {
   return cy
     .request({
diff --git a/web/src/components/onboarding-wizard.tsx b/web/src/components/onboarding-wizard.tsx
diff --git a/web/src/hooks/use-model-list.ts b/web/src/hooks/use-model-list.ts
diff --git a/web/src/pages/instances.tsx b/web/src/pages/instances.tsx

Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,7 @@ func init() {`
`64`	`64`	`// (single-GPU, request queuing) where per-request timeouts matter.`
`65`	`65`	`func isLocalProvider(provider string) bool {`
`66`	`66`	`switch provider {`
`67`		`- case "ollama", "lm-studio", "unsloth", "vllm", "llamacpp", "local":`
	`67`	`+ case "ollama", "lm-studio", "llama-server", "unsloth", "vllm", "llamacpp", "local":`
`68`	`68`	`return true`
`69`	`69`	`}`
`70`	`70`	`return false`
Original file line number	Diff line number	Diff line change
`@@ -61,6 +61,8 @@ func newOpenAIProvider(provider, apiKey, baseURL, model, systemPrompt, task stri`
`61`	`61`	`opts = append(opts, openaioption.WithBaseURL("http://ollama.default.svc:11434/v1"))`
`62`	`62`	`} else if provider == "lm-studio" {`
`63`	`63`	`opts = append(opts, openaioption.WithBaseURL("http://localhost:1234/v1"))`
	`64`	`+ } else if provider == "llama-server" {`
	`65`	`+ opts = append(opts, openaioption.WithBaseURL("http://localhost:8080/v1"))`
`64`	`66`	`} else if provider == "unsloth" {`
`65`	`67`	`opts = append(opts, openaioption.WithBaseURL("http://localhost:8080/v1"))`
`66`	`68`	`}`