Skip to content

Commit 86ec4ae

Browse files
AlexsJonesclaude
andcommitted
feat: add llama-server as a first-class AI provider
Add llama-server (llama.cpp's official server binary) as an AI provider following the same pattern as LM Studio across TUI onboarding, BubbleTea wizards, web UI, agent-runner, and apiserver. The existing llama-cpp node-probe target on port 8080 already discovers llama-server instances, so the UI maps llama-server → llama-cpp for node detection. Also changes the local dev API_ADDR default from :8080 to :8081 to fix an existing Vite proxy mismatch and free port 8080 for inference servers during local development. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 37d05b6 commit 86ec4ae

10 files changed

Lines changed: 193 additions & 18 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ web-clean: ## Remove frontend build artifacts
208208

209209
SYMPOZIUM_TOKEN ?= $(shell t=$$(kubectl get secret -n sympozium-system -l app.kubernetes.io/component=apiserver -o jsonpath='{.items[0].data.token}' 2>/dev/null | base64 -d 2>/dev/null); [ -n "$$t" ] && echo "$$t" || echo dev-token)
210210
SYMPOZIUM_NAMESPACE ?= sympozium-system
211-
API_ADDR ?= :8080
211+
API_ADDR ?= :8081
212212
VITE_PORT ?= 5173
213213

214214
API_LOCAL_PORT ?= 8081

cmd/agent-runner/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func init() {
6464
// (single-GPU, request queuing) where per-request timeouts matter.
6565
func isLocalProvider(provider string) bool {
6666
switch provider {
67-
case "ollama", "lm-studio", "unsloth", "vllm", "llamacpp", "local":
67+
case "ollama", "lm-studio", "llama-server", "unsloth", "vllm", "llamacpp", "local":
6868
return true
6969
}
7070
return false

cmd/agent-runner/provider_openai.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ func newOpenAIProvider(provider, apiKey, baseURL, model, systemPrompt, task stri
6161
opts = append(opts, openaioption.WithBaseURL("http://ollama.default.svc:11434/v1"))
6262
} else if provider == "lm-studio" {
6363
opts = append(opts, openaioption.WithBaseURL("http://localhost:1234/v1"))
64+
} else if provider == "llama-server" {
65+
opts = append(opts, openaioption.WithBaseURL("http://localhost:8080/v1"))
6466
} else if provider == "unsloth" {
6567
opts = append(opts, openaioption.WithBaseURL("http://localhost:8080/v1"))
6668
}

cmd/sympozium/main.go

Lines changed: 90 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -616,9 +616,10 @@ func runOnboard() error {
616616
fmt.Println(" 3) Azure OpenAI")
617617
fmt.Println(" 4) Ollama (local, no API key needed)")
618618
fmt.Println(" 5) LM Studio (local, optional API key)")
619-
fmt.Println(" 6) AWS Bedrock (Claude, Nova, etc.)")
620-
fmt.Println(" 7) Other / OpenAI-compatible")
621-
providerChoice := prompt(reader, " Choice [1-7]", "1")
619+
fmt.Println(" 6) llama-server (local, no API key needed)")
620+
fmt.Println(" 7) AWS Bedrock (Claude, Nova, etc.)")
621+
fmt.Println(" 8) Other / OpenAI-compatible")
622+
providerChoice := prompt(reader, " Choice [1-8]", "1")
622623

623624
var providerName, secretEnvKey, modelName, baseURL string
624625
switch providerChoice {
@@ -644,6 +645,12 @@ func runOnboard() error {
644645
modelName = prompt(reader, " Model name", "")
645646
fmt.Println(" 💡 No API key needed for LM Studio.")
646647
case "6":
648+
providerName = "llama-server"
649+
secretEnvKey = ""
650+
baseURL = prompt(reader, " llama-server URL", "http://localhost:8080/v1")
651+
modelName = prompt(reader, " Model name", "")
652+
fmt.Println(" 💡 No API key needed for llama-server.")
653+
case "7":
647654
providerName = "bedrock"
648655
secretEnvKey = "" // Bedrock uses multiple AWS credential keys, handled below.
649656
awsRegion := prompt(reader, " AWS Region", "us-east-1")
@@ -675,7 +682,7 @@ func runOnboard() error {
675682
if err := kubectl(args...); err != nil {
676683
return fmt.Errorf("create Bedrock provider secret: %w", err)
677684
}
678-
case "7":
685+
case "8":
679686
providerName = prompt(reader, " Provider name", "custom")
680687
secretEnvKey = prompt(reader, " API key env var name (empty if none)", "API_KEY")
681688
baseURL = prompt(reader, " API base URL", "")
@@ -1823,6 +1830,7 @@ var providerSuggestions = []suggestion{
18231830
{"anthropic", "Anthropic (Claude)"},
18241831
{"azure-openai", "Azure OpenAI Service"},
18251832
{"ollama", "Ollama (local)"},
1833+
{"llama-server", "llama-server (llama.cpp local)"},
18261834
{"bedrock", "AWS Bedrock (Claude, Nova, etc.)"},
18271835
{"openai-compatible", "OpenAI-compatible endpoint"},
18281836
}
@@ -2206,6 +2214,7 @@ const (
22062214
wizStepWhatsAppQR // auto — stream QR from pod logs
22072215
wizStepDone // auto — show result
22082216
wizStepLMStudioAPIKeyRequired // y/n: LM Studio requires API key?
2217+
wizStepLlamaServerAPIKeyRequired // y/n: llama-server requires API key?
22092218
wizStepAWSRegion // text: AWS region for Bedrock
22102219
wizStepAWSAccessKeyID // text: AWS Access Key ID
22112220
wizStepAWSSecretAccessKey // text: AWS Secret Access Key
@@ -2215,7 +2224,8 @@ const (
22152224
wizStepPersonaPick // menu: select a persona pack
22162225
wizStepPersonaProvider // menu 1-6: provider
22172226
wizStepPersonaBaseURL // text: base URL
2218-
wizStepPersonaLMStudioAPIKeyRequired // y/n: LM Studio requires API key?
2227+
wizStepPersonaLMStudioAPIKeyRequired // y/n: LM Studio requires API key?
2228+
wizStepPersonaLlamaServerAPIKeyRequired // y/n: llama-server requires API key?
22192229
wizStepPersonaAPIKey // text: API key
22202230
wizStepPersonaModel // text: model name
22212231
wizStepPersonaGithubRepo // text: GitHub repo (owner/repo)
@@ -6042,6 +6052,9 @@ func resolveInstanceProvider(inst sympoziumv1alpha1.SympoziumInstance) string {
60426052
if strings.Contains(u, "lm-studio") || strings.Contains(u, ":1234") {
60436053
return "lm-studio"
60446054
}
6055+
if strings.Contains(u, "llama-server") {
6056+
return "llama-server"
6057+
}
60456058
return "custom"
60466059
}
60476060
return "-"
@@ -8019,6 +8032,8 @@ func tuiCreateRun(ns, instance, task string) (string, error) {
80198032
provider = "ollama"
80208033
} else if strings.Contains(inst.Spec.Agents.Default.BaseURL, "lm-studio") || strings.Contains(inst.Spec.Agents.Default.BaseURL, ":1234") {
80218034
provider = "lm-studio"
8035+
} else if strings.Contains(inst.Spec.Agents.Default.BaseURL, "llama-server") {
8036+
provider = "llama-server"
80228037
} else {
80238038
provider = "custom"
80248039
}
@@ -8692,7 +8707,7 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
86928707
}
86938708
w.instanceName = val
86948709
w.step = wizStepProvider
8695-
m.input.Placeholder = "Choice [1-7] (default: 1 — OpenAI)"
8710+
m.input.Placeholder = "Choice [1-8] (default: 1 — OpenAI)"
86968711
return m, nil
86978712

86988713
case wizStepProvider:
@@ -8727,12 +8742,18 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
87278742
m.input.Placeholder = "LM Studio URL (default: http://localhost:1234/v1)"
87288743
return m, nil
87298744
case "6":
8745+
w.providerName = "llama-server"
8746+
w.secretEnvKey = ""
8747+
w.step = wizStepBaseURL
8748+
m.input.Placeholder = "llama-server URL (default: http://localhost:8080/v1)"
8749+
return m, nil
8750+
case "7":
87308751
w.providerName = "bedrock"
87318752
w.secretEnvKey = ""
87328753
w.step = wizStepAWSRegion
87338754
m.input.Placeholder = "AWS Region (default: us-east-1)"
87348755
return m, nil
8735-
case "7":
8756+
case "8":
87368757
w.providerName = "custom"
87378758
w.secretEnvKey = "API_KEY"
87388759
w.step = wizStepBaseURL
@@ -8754,13 +8775,22 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
87548775
if val == "" && w.providerName == "lm-studio" {
87558776
val = "http://localhost:1234/v1"
87568777
}
8778+
if val == "" && w.providerName == "llama-server" {
8779+
val = "http://localhost:8080/v1"
8780+
}
87578781
w.baseURL = val
87588782
if w.providerName == "lm-studio" {
87598783
// LM Studio — ask if API key is required.
87608784
w.step = wizStepLMStudioAPIKeyRequired
87618785
m.input.Placeholder = "Does LM Studio require an API key? [Y/n]"
87628786
return m, nil
87638787
}
8788+
if w.providerName == "llama-server" {
8789+
// llama-server — ask if API key is required.
8790+
w.step = wizStepLlamaServerAPIKeyRequired
8791+
m.input.Placeholder = "Does llama-server require an API key? [Y/n]"
8792+
return m, nil
8793+
}
87648794
if w.secretEnvKey == "" {
87658795
// Ollama — no API key, go straight to model.
87668796
w.step = wizStepModel
@@ -8787,6 +8817,21 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
87878817
}
87888818
return m, nil
87898819

8820+
case wizStepLlamaServerAPIKeyRequired:
8821+
w.step = wizStepModel // default fallback
8822+
switch strings.ToLower(val) {
8823+
case "y", "yes":
8824+
w.secretEnvKey = "API_KEY"
8825+
w.step = wizStepAPIKey
8826+
m.input.Placeholder = "Please enter the API key for llama-server:"
8827+
default:
8828+
// User skips API key - show warning
8829+
m.addLog(tuiErrorStyle.Render("⚠ Warning: Ensure your llama-server is running without authentication"))
8830+
w.step = wizStepModel
8831+
m.input.Placeholder = "Model name (default: llama3)"
8832+
}
8833+
return m, nil
8834+
87908835
case wizStepAWSRegion:
87918836
if val == "" {
87928837
val = "us-east-1"
@@ -9069,7 +9114,7 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
90699114
}
90709115

90719116
w.step = wizStepPersonaProvider
9072-
m.input.Placeholder = "Choice [1-7] (default: 1 — OpenAI)"
9117+
m.input.Placeholder = "Choice [1-8] (default: 1 — OpenAI)"
90739118
return m, nil
90749119

90759120
case wizStepPersonaProvider:
@@ -9103,12 +9148,18 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
91039148
m.input.Placeholder = "LM Studio URL (default: http://localhost:1234/v1)"
91049149
return m, nil
91059150
case "6":
9151+
w.providerName = "llama-server"
9152+
w.secretEnvKey = ""
9153+
w.step = wizStepPersonaBaseURL
9154+
m.input.Placeholder = "llama-server URL (default: http://localhost:8080/v1)"
9155+
return m, nil
9156+
case "7":
91069157
w.providerName = "bedrock"
91079158
w.secretEnvKey = ""
91089159
w.step = wizStepAWSRegion
91099160
m.input.Placeholder = "AWS Region (default: us-east-1)"
91109161
return m, nil
9111-
case "7":
9162+
case "8":
91129163
w.providerName = "custom"
91139164
w.secretEnvKey = "API_KEY"
91149165
w.step = wizStepPersonaBaseURL
@@ -9129,13 +9180,22 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
91299180
if val == "" && w.providerName == "lm-studio" {
91309181
val = "http://localhost:1234/v1"
91319182
}
9183+
if val == "" && w.providerName == "llama-server" {
9184+
val = "http://localhost:8080/v1"
9185+
}
91329186
w.baseURL = val
91339187
if w.providerName == "lm-studio" {
91349188
// LM Studio — ask if API key is required.
91359189
w.step = wizStepPersonaLMStudioAPIKeyRequired
91369190
m.input.Placeholder = "Does LM Studio require an API key? [Y/n]"
91379191
return m, nil
91389192
}
9193+
if w.providerName == "llama-server" {
9194+
// llama-server — ask if API key is required.
9195+
w.step = wizStepPersonaLlamaServerAPIKeyRequired
9196+
m.input.Placeholder = "Does llama-server require an API key? [Y/n]"
9197+
return m, nil
9198+
}
91399199
if w.secretEnvKey == "" {
91409200
// Ollama — no key needed, skip to model.
91419201
w.step = wizStepPersonaModel
@@ -9161,6 +9221,21 @@ func (m tuiModel) advanceWizard(val string) (tea.Model, tea.Cmd) {
91619221
}
91629222
return m, nil
91639223

9224+
case wizStepPersonaLlamaServerAPIKeyRequired:
9225+
w.step = wizStepPersonaModel // default fallback
9226+
switch strings.ToLower(val) {
9227+
case "y", "yes":
9228+
w.secretEnvKey = "API_KEY"
9229+
w.step = wizStepPersonaAPIKey
9230+
m.input.Placeholder = "Please enter the API key for llama-server:"
9231+
default:
9232+
// User skips API key - show warning
9233+
m.addLog(tuiErrorStyle.Render("⚠ Warning: Ensure your llama-server is running without authentication"))
9234+
w.step = wizStepPersonaModel
9235+
m.input.Placeholder = "Model name (default: llama3)"
9236+
}
9237+
return m, nil
9238+
91649239
case wizStepPersonaAPIKey:
91659240
w.apiKey = val
91669241
if w.apiKey == "" && w.secretEnvKey != "" {
@@ -9492,8 +9567,9 @@ func (m tuiModel) renderWizardPanel(h int) string {
94929567
lines = append(lines, menuNumStyle.Render(" 3)")+menuStyle.Render(" Azure OpenAI"))
94939568
lines = append(lines, menuNumStyle.Render(" 4)")+menuStyle.Render(" Ollama (local, no API key needed)"))
94949569
lines = append(lines, menuNumStyle.Render(" 5)")+menuStyle.Render(" LM Studio (local, optional API key)"))
9495-
lines = append(lines, menuNumStyle.Render(" 6)")+menuStyle.Render(" AWS Bedrock (Claude, Nova, etc.)"))
9496-
lines = append(lines, menuNumStyle.Render(" 7)")+menuStyle.Render(" Other / OpenAI-compatible"))
9570+
lines = append(lines, menuNumStyle.Render(" 6)")+menuStyle.Render(" llama-server (local, no API key needed)"))
9571+
lines = append(lines, menuNumStyle.Render(" 7)")+menuStyle.Render(" AWS Bedrock (Claude, Nova, etc.)"))
9572+
lines = append(lines, menuNumStyle.Render(" 8)")+menuStyle.Render(" Other / OpenAI-compatible"))
94979573

94989574
case wizStepBaseURL:
94999575
lines = append(lines, stepStyle.Render(" 📋 Step 3/9 — AI Provider (continued)"))
@@ -9873,8 +9949,9 @@ func (m tuiModel) renderPersonaWizardPanel(h int,
98739949
lines = append(lines, menuNumStyle.Render(" [3]")+menuStyle.Render(" Azure OpenAI")+hintStyle.Render(" — Enterprise Azure"))
98749950
lines = append(lines, menuNumStyle.Render(" [4]")+menuStyle.Render(" Ollama")+hintStyle.Render(" — Local models"))
98759951
lines = append(lines, menuNumStyle.Render(" [5]")+menuStyle.Render(" LM Studio")+hintStyle.Render(" — Local models"))
9876-
lines = append(lines, menuNumStyle.Render(" [6]")+menuStyle.Render(" AWS Bedrock")+hintStyle.Render(" — Claude, Nova, etc."))
9877-
lines = append(lines, menuNumStyle.Render(" [7]")+menuStyle.Render(" Custom")+hintStyle.Render(" — Any OpenAI-compatible API"))
9952+
lines = append(lines, menuNumStyle.Render(" [6]")+menuStyle.Render(" llama-server")+hintStyle.Render(" — Local models (llama.cpp)"))
9953+
lines = append(lines, menuNumStyle.Render(" [7]")+menuStyle.Render(" AWS Bedrock")+hintStyle.Render(" — Claude, Nova, etc."))
9954+
lines = append(lines, menuNumStyle.Render(" [8]")+menuStyle.Render(" Custom")+hintStyle.Render(" — Any OpenAI-compatible API"))
98789955
lines = append(lines, "")
98799956

98809957
case wizStepPersonaBaseURL:

internal/apiserver/server.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,8 @@ func (s *Server) createRun(w http.ResponseWriter, r *http.Request) {
816816
provider = "ollama"
817817
} else if strings.Contains(inst.Spec.Agents.Default.BaseURL, "lm-studio") || strings.Contains(inst.Spec.Agents.Default.BaseURL, ":1234") {
818818
provider = "lm-studio"
819+
} else if strings.Contains(inst.Spec.Agents.Default.BaseURL, "llama-server") {
820+
provider = "llama-server"
819821
} else {
820822
provider = "custom"
821823
}
@@ -1766,6 +1768,8 @@ func defaultProviderBaseURL(provider string) string {
17661768
return "http://ollama.default.svc:11434/v1"
17671769
case "lm-studio":
17681770
return "http://localhost:1234/v1"
1771+
case "llama-server":
1772+
return "http://localhost:8080/v1"
17691773
default:
17701774
return ""
17711775
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Test: create an ad-hoc instance using llama-server.
2+
3+
const INSTANCE = `cypress-llama-server-${Date.now()}`;
4+
5+
describe("Create Instance — llama-server", () => {
6+
after(() => {
7+
cy.deleteInstance(INSTANCE);
8+
});
9+
10+
it("walks through the wizard and creates the instance", () => {
11+
cy.visit("/instances");
12+
13+
cy.contains("button", "Create Instance", { timeout: 20000 }).click();
14+
15+
// ── Step 1: Name ──────────────────────────────────────────
16+
cy.get("[role='dialog']").find("input[placeholder='my-agent']").clear().type(INSTANCE);
17+
cy.wizardNext();
18+
19+
// ── Step 2: Provider — select llama-server ────────────────
20+
cy.get("[role='dialog']").find("button[role='combobox']").click({ force: true });
21+
cy.get("[data-radix-popper-content-wrapper]")
22+
.contains("llama-server")
23+
.click({ force: true });
24+
cy.wizardNext();
25+
26+
// ── Step 3: Auth ──────────────────────────────────────────
27+
// llama-server needs no API key — go straight through.
28+
cy.wizardNext();
29+
30+
// ── Step 4: Model ─────────────────────────────────────────
31+
cy.get("[role='dialog']").find("input[placeholder='gpt-4o']").clear().type("default");
32+
cy.wizardNext();
33+
34+
// ── Step 5: Skills ────────────────────────────────────────
35+
cy.wizardNext();
36+
37+
// ── Step 6: Heartbeat ─────────────────────────────────────
38+
cy.get("[role='dialog']").contains("button", "No heartbeat").click({ force: true });
39+
cy.wizardNext();
40+
41+
// ── Step 7: Channels ──────────────────────────────────────
42+
cy.wizardNext();
43+
44+
// ── Step 8: Confirm ───────────────────────────────────────
45+
cy.get("[role='dialog']").contains(INSTANCE);
46+
cy.get("[role='dialog']").contains("llama-server");
47+
cy.get("[role='dialog']").contains("default");
48+
cy.get("[role='dialog']").contains("button", "Create").click({ force: true });
49+
50+
// Wait for the dialog to close (instance was created).
51+
cy.get("[role='dialog']").should("not.exist", { timeout: 20000 });
52+
53+
// ── Verify instance appears in the list ───────────────────
54+
cy.contains(INSTANCE, { timeout: 20000 }).should("be.visible");
55+
});
56+
});

web/cypress/support/e2e.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ declare global {
3232
name: string,
3333
opts?: { skills?: string[] },
3434
): Chainable<void>;
35+
/** Create a minimal llama-server SympoziumInstance via API. */
36+
createLlamaServerInstance(
37+
name: string,
38+
opts?: { skills?: string[] },
39+
): Chainable<void>;
3540
/** Dispatch an ad-hoc run against an instance via API. Returns the created run name. */
3641
dispatchRun(
3742
instanceRef: string,
@@ -128,6 +133,31 @@ Cypress.Commands.add("createLMStudioInstance", (name: string, opts) => {
128133
});
129134
});
130135

136+
Cypress.Commands.add("createLlamaServerInstance", (name: string, opts) => {
137+
const body: Record<string, unknown> = {
138+
name,
139+
provider: "llama-server",
140+
model: "default",
141+
baseURL: "http://host.docker.internal:8080/v1",
142+
};
143+
if (opts?.skills?.length) {
144+
body.skills = opts.skills.map((s) => ({ skillPackRef: s }));
145+
}
146+
cy.request({
147+
method: "POST",
148+
url: "/api/v1/instances?namespace=default",
149+
headers: authHeaders(),
150+
body,
151+
failOnStatusCode: false,
152+
}).then((resp) => {
153+
if (resp.status >= 400 && resp.status !== 409) {
154+
throw new Error(
155+
`createLlamaServerInstance failed (${resp.status}): ${JSON.stringify(resp.body)}`,
156+
);
157+
}
158+
});
159+
});
160+
131161
Cypress.Commands.add("dispatchRun", (instanceRef: string, task: string, opts) => {
132162
return cy
133163
.request({

0 commit comments

Comments
 (0)