Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
62722de
fix(ui): honor custom agent model_id when session is at DB default (#…
itomek Apr 20, 2026
4acfd40
fix(ui): extract _build_create_kwargs/_effective_model, import SESSIO…
itomek Apr 20, 2026
8f5c762
fix(ui): restore intent-key for agent cache store to fix miss regress…
itomek-amd Apr 22, 2026
a0fdb10
docs(plans): fix broken CMU link to EMNLP 2004 Email Speech Acts pape…
kovtcharov Apr 20, 2026
3b51ca9
style(mcp): apply Black formatting to mcp_bridge.py (CI lint fix)
itomek Apr 22, 2026
0a4f260
feat(llm): add Gemma 4 E4B as default and native tool_calls priority
itomek Apr 24, 2026
7b03ee8
chore: merge main into branch, resolve SESSION_DEFAULT_MODEL conflict
itomek Apr 24, 2026
d71cd91
feat(llm): bump Lemonade default port 8000 -> 13305, require v10.1.0+
itomek Apr 24, 2026
da0c9ae
test(eval): post-swap Gemma-4-E4B baseline (Lemonade v10.2.0, ctx=409…
itomek Apr 24, 2026
6864cc3
test(eval): update Gemma baseline with ctx=32768 re-runs
itomek Apr 24, 2026
7492d41
docs(cpp): bump Lemonade version references to v10.1.0
itomek Apr 24, 2026
e9ef966
feat(llm): bump Lemonade version requirement to 10.2.0
itomek Apr 24, 2026
9ad8a11
ci(workflows): port 8000 -> 13305; tests: update Qwen->Gemma & 10.1.0…
itomek Apr 25, 2026
b80b048
ci(workflows): fix server start port — start-lemonade on 13305, not 8000
itomek Apr 25, 2026
75f6d84
fix(check-doc-links): treat connection reset as warning, not broken link
itomek Apr 25, 2026
f04c9e6
test: update hardcoded port 8000 → 13305 in lemonade client tests
itomek Apr 25, 2026
67e4b6a
style: black format test_lemonade_client.py
itomek Apr 25, 2026
942591c
test(agent-sdk): use GAIA_TEST_MODEL env var; set Llama in CI
itomek Apr 25, 2026
fae37a8
fix(ci): allow Linux test port to be overridden via LEMONADE_PORT env…
itomek Apr 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/build_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -293,21 +293,21 @@ jobs:
timeout-minutes: 30
env:
GAIA_CPP_TEST_MODEL: Qwen3-4B-Instruct-2507-GGUF
GAIA_CPP_BASE_URL: http://localhost:8000/api/v1
GAIA_CPP_BASE_URL: http://localhost:13305/api/v1
run: |
try {
# Start Lemonade with Qwen3-4B-GGUF
.\installer\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-Instruct-2507-GGUF" -Port 8000 -CtxSize 16384 -InitWaitTime 15
.\installer\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-Instruct-2507-GGUF" -Port 13305 -CtxSize 16384 -InitWaitTime 15

# Verify health
$health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10
$health = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 10
if ($health.status -ne "ok") { throw "Lemonade health check failed" }
Write-Host "[OK] Lemonade Server ready with Qwen3-4B-Instruct-2507-GGUF"

# Run all C++ integration tests (LLM + MCP + WiFi + Health)
Write-Host "=== Running C++ Integration Tests (LLM + MCP + WiFi + Health) ==="
$env:GAIA_CPP_TEST_MODEL = "Qwen3-4B-Instruct-2507-GGUF"
$env:GAIA_CPP_BASE_URL = "http://localhost:8000/api/v1"
$env:GAIA_CPP_BASE_URL = "http://localhost:13305/api/v1"
# -j 1: run tests sequentially so they don't compete for the single LLM server
ctest --test-dir cpp/build-integration -C Release --output-on-failure -j 1
if ($LASTEXITCODE -ne 0) { throw "C++ integration tests failed" }
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/test_agent_sdk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
# Start the server in the background as a process (not PowerShell job)
Write-Host "Starting lemonade-server in background..."
# Start the server as a background process
$serverProcess = Start-Process -FilePath "lemonade-server" -ArgumentList "serve", "--no-tray" -PassThru -WindowStyle Hidden
$serverProcess = Start-Process -FilePath "lemonade-server" -ArgumentList "serve", "--no-tray", "--port", "13305" -PassThru -WindowStyle Hidden
Write-Host "Started lemonade-server process with ID: $($serverProcess.Id)"

# Wait for server to start up
Expand All @@ -97,7 +97,7 @@ jobs:
$waitTime += 2

try {
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
Write-Host "Server is ready and responding to health checks"
$serverReady = $true
} catch {
Expand Down Expand Up @@ -145,6 +145,8 @@ jobs:

REM Run the comprehensive integration test suite
set PYTHONIOENCODING=utf-8
REM Use the model that was pulled above (overrides DEFAULT_MODEL_NAME=Gemma-4-E4B)
set GAIA_TEST_MODEL=Llama-3.2-3B-Instruct-Hybrid
python tests\test_agent_sdk.py
set integration_exit=%ERRORLEVEL%

Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/test_api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
$serverJob = Start-Job -ScriptBlock {
# Workaround for Issue #612: Disable Vulkan cooperative matrix optimization
$env:GGML_VK_DISABLE_COOPMAT = "1"
& lemonade-server serve --ctx-size 8192 --host localhost --port 8000 --no-tray 2>&1
& lemonade-server serve --ctx-size 8192 --host localhost --port 13305 --no-tray 2>&1
}
Write-Host "Started Lemonade server job with ID: $($serverJob.Id)"
$env:LEMONADE_JOB_ID = $serverJob.Id
Expand All @@ -93,7 +93,7 @@ jobs:
$waitTime += 2

try {
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
Write-Host "[OK] Lemonade server is ready"
Write-Host "Health response: $($response | ConvertTo-Json -Compress)"
$serverReady = $true
Expand All @@ -112,7 +112,7 @@ jobs:
Write-Host "Pulling Qwen3-0.6B-GGUF..."
try {
$body = @{ model_name = "Qwen3-0.6B-GGUF" } | ConvertTo-Json
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
-Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
Write-Host " [OK] Qwen3-0.6B-GGUF pull initiated"
} catch {
Expand All @@ -128,7 +128,7 @@ jobs:
try {
$loadRequest = @{ model_name = "Qwen3-0.6B-GGUF" } | ConvertTo-Json
Write-Host "Loading model: Qwen3-0.6B-GGUF"
$loadResponse = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/load" `
$loadResponse = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/load" `
-Method POST -Body $loadRequest -ContentType "application/json" -TimeoutSec 120
Write-Host "[OK] Model loaded successfully: $($loadResponse | ConvertTo-Json -Compress)"
} catch {
Expand All @@ -144,7 +144,7 @@ jobs:

# Verify models
try {
$models = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/models" -Method GET
$models = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/models" -Method GET
Write-Host "`n[OK] Available models:"
$models.data | ForEach-Object { Write-Host " - $($_.id)" }
} catch {
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/test_embeddings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
$serverJob = Start-Job -ScriptBlock {
# Workaround for Issue #612: Disable Vulkan cooperative matrix optimization
$env:GGML_VK_DISABLE_COOPMAT = "1"
& lemonade-server serve --host localhost --port 8000 --no-tray 2>&1
& lemonade-server serve --host localhost --port 13305 --no-tray 2>&1
}
Write-Host "Started Lemonade server job with ID: $($serverJob.Id)"
$env:LEMONADE_JOB_ID = $serverJob.Id
Expand All @@ -84,7 +84,7 @@ jobs:
$waitTime += 2

try {
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
Write-Host "[OK] Lemonade server is ready"
Write-Host "Health response: $($response | ConvertTo-Json -Compress)"
$serverReady = $true
Expand Down Expand Up @@ -115,7 +115,7 @@ jobs:
Write-Host "Pulling nomic-embed-text-v2-moe-GGUF..."
try {
$body = @{ model_name = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
-Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
Write-Host " [OK] Model pull initiated"
} catch {
Expand All @@ -130,7 +130,7 @@ jobs:
} | ConvertTo-Json

Write-Host "Loading model: nomic-embed-text-v2-moe-GGUF"
$loadResponse = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/load" `
$loadResponse = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/load" `
-Method POST -Body $loadRequest -ContentType "application/json" -TimeoutSec 60
Write-Host "[OK] Model loaded successfully: $($loadResponse | ConvertTo-Json -Compress)"
} catch {
Expand All @@ -147,7 +147,7 @@ jobs:

# Verify model is available
try {
$models = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/models" -Method GET
$models = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/models" -Method GET
Write-Host "`n[OK] Available models:"
$models.data | ForEach-Object { Write-Host " - $($_.id)" }
} catch {
Expand All @@ -157,7 +157,7 @@ jobs:
# Verify server is still responding before embeddings test
Write-Host "`n=== Verifying Server Health ==="
try {
$health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10
$health = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 10
Write-Host "[OK] Server responding: $($health | ConvertTo-Json -Compress)"
} catch {
Write-Host "[ERROR] Server health check failed: $($_.Exception.Message)"
Expand All @@ -179,7 +179,7 @@ jobs:
try {
$testBody = @{ input = @("test embedding"); model = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json
# Use localhost consistently and increased timeout for first embedding request
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/embeddings" `
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/embeddings" `
-Method POST -ContentType "application/json" -Body $testBody -TimeoutSec 300
Write-Host "[OK] Embedding model verified successfully"
$modelReady = $true
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/test_gaia_cli_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ jobs:
echo "=== Listing Available Models ==="
curl -s http://localhost:8000/api/v1/models | jq '.' || echo "Could not list models"

# Python lemonade-server-dev runs on port 8000; tell GAIA CLI where to connect
export LEMONADE_BASE_URL=http://localhost:8000/api/v1

echo "=== Testing Core GAIA CLI Commands with Lemonade ==="

# Test chat command with Qwen model (should now work with Lemonade)
Expand Down Expand Up @@ -191,7 +194,8 @@ jobs:
echo "Testing LemonadeClient API with running server"

# Run the lemonade client integration tests (skip hybrid NPU test - no NPU on Linux)
GAIA_TEST_MODEL="Qwen3-0.6B-GGUF" python -m pytest tests/test_lemonade_client.py -vs --tb=short -k "Integration and not hybrid" || LEMONADE_TEST_EXIT=$?
# LEMONADE_PORT=8000: lemonade-server-dev always binds to 8000 (no --port flag)
LEMONADE_PORT=8000 GAIA_TEST_MODEL="Qwen3-0.6B-GGUF" python -m pytest tests/test_lemonade_client.py -vs --tb=short -k "Integration and not hybrid" || LEMONADE_TEST_EXIT=$?

if [ "${LEMONADE_TEST_EXIT:-0}" -eq 0 ]; then
echo "✅ Lemonade client integration tests passed successfully!"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test_gaia_cli_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
# Start the server in the background as a process (not PowerShell job)
Write-Host "Starting lemonade-server in background..."
# Start the server as a background process
$serverProcess = Start-Process -FilePath "lemonade-server" -ArgumentList "serve", "--no-tray" -PassThru -WindowStyle Hidden
$serverProcess = Start-Process -FilePath "lemonade-server" -ArgumentList "serve", "--no-tray", "--port", "13305" -PassThru -WindowStyle Hidden
Write-Host "Started lemonade-server process with ID: $($serverProcess.Id)"

# Wait for server to start up
Expand All @@ -106,7 +106,7 @@ jobs:
$waitTime += 2

try {
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
Write-Host "Server is ready and responding to health checks"
$serverReady = $true
} catch {
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test_lemonade_server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ jobs:
run: |
try {
# Start server and load model (all in one session)
.\installer\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-Instruct-2507-GGUF" -Port 8000 -CtxSize 32768 -InitWaitTime 10
.\installer\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-Instruct-2507-GGUF" -Port 13305 -CtxSize 32768 -InitWaitTime 10

# Verify health endpoint
Write-Host "=== Verifying Health Endpoint ==="
$health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10
$health = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 10
Write-Host "Health response: $($health | ConvertTo-Json -Compress)"

if ($health.status -ne "ok") {
Expand Down Expand Up @@ -93,7 +93,7 @@ jobs:
max_tokens = 10
} | ConvertTo-Json

$completion = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/completions" `
$completion = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/completions" `
-Method POST -ContentType "application/json" -Body $testBody -TimeoutSec 30

Write-Host "[OK] Completion successful"
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/test_rag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ jobs:
$serverJob = Start-Job -ScriptBlock {
# Workaround for Issue #612: Disable Vulkan cooperative matrix optimization
$env:GGML_VK_DISABLE_COOPMAT = "1"
& lemonade-server serve --host localhost --port 8000 --ctx-size 8192 --no-tray 2>&1
& lemonade-server serve --host localhost --port 13305 --ctx-size 8192 --no-tray 2>&1
}
Write-Host "Started Lemonade server job with ID: $($serverJob.Id)"
$env:LEMONADE_JOB_ID = $serverJob.Id
Expand All @@ -126,7 +126,7 @@ jobs:
$waitTime += 2

try {
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
Write-Host "[OK] Lemonade server is ready"
Write-Host "Health response: $($response | ConvertTo-Json -Compress)"
$serverReady = $true
Expand Down Expand Up @@ -159,7 +159,7 @@ jobs:
Write-Host "Pulling Qwen3-4B-Instruct-2507-GGUF..."
try {
$body = @{ model_name = "Qwen3-4B-Instruct-2507-GGUF" } | ConvertTo-Json
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
-Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
Write-Host " [OK] Qwen3-4B-Instruct-2507-GGUF pull initiated"
} catch {
Expand All @@ -170,7 +170,7 @@ jobs:
Write-Host "Pulling nomic-embed-text-v2-moe-GGUF..."
try {
$body = @{ model_name = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
-Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
Write-Host " [OK] nomic-embed-text-v2-moe-GGUF pull initiated"
} catch {
Expand All @@ -181,7 +181,7 @@ jobs:
Write-Host "Pulling Qwen3-VL-4B-Instruct-GGUF..."
try {
$body = @{ model_name = "Qwen3-VL-4B-Instruct-GGUF" } | ConvertTo-Json
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
-Method POST -ContentType "application/json" -Body $body -TimeoutSec 1200
Write-Host " [OK] Qwen3-VL-4B-Instruct-GGUF pull initiated"
} catch {
Expand All @@ -196,7 +196,7 @@ jobs:
} | ConvertTo-Json

Write-Host "Loading model: nomic-embed-text-v2-moe-GGUF"
$loadResponse = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/load" `
$loadResponse = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/load" `
-Method POST -Body $loadRequest -ContentType "application/json" -TimeoutSec 60
Write-Host "[OK] Model loaded successfully: $($loadResponse | ConvertTo-Json -Compress)"
} catch {
Expand All @@ -213,7 +213,7 @@ jobs:

# Verify models
try {
$models = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/models" -Method GET
$models = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/models" -Method GET
Write-Host "`n[OK] Available models:"
$models.data | ForEach-Object { Write-Host " - $($_.id)" }
} catch {
Expand All @@ -223,7 +223,7 @@ jobs:
# Verify server is still responding before embeddings test
Write-Host "`n=== Verifying Server Health ==="
try {
$health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10
$health = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 10
Write-Host "[OK] Server responding: $($health | ConvertTo-Json -Compress)"
} catch {
Write-Host "[ERROR] Server health check failed: $($_.Exception.Message)"
Expand All @@ -245,7 +245,7 @@ jobs:
try {
$testBody = @{ input = @("test embedding"); model = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json
# Use localhost consistently and increased timeout for first embedding request
$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/embeddings" `
$response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/embeddings" `
-Method POST -ContentType "application/json" -Body $testBody -TimeoutSec 300
Write-Host "[OK] Embedding model verified successfully"
$modelReady = $true
Expand Down
10 changes: 5 additions & 5 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,23 @@ Included demos:

The agent connects to an OpenAI-compatible LLM server at `http://localhost:8000/api/v1` by default. The reference backend is [Lemonade Server](https://github.com/lemonade-sdk/lemonade), which runs models locally on AMD hardware.

Download and install Lemonade Server v10.0.0, then start it:
Download and install Lemonade Server v10.2.0, then start it:

**Windows:**
```powershell
# Download and run the MSI installer
curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.0.0/lemonade-server-minimal.msi
curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.2.0/lemonade-server-minimal.msi
msiexec /i lemonade-server-minimal.msi
```

**Linux:**
```bash
# Download and install the .deb package
curl -L -o lemonade-server_10.0.0_amd64.deb https://github.com/lemonade-sdk/lemonade/releases/download/v10.0.0/lemonade-server_10.0.0_amd64.deb
sudo dpkg -i lemonade-server_10.0.0_amd64.deb
curl -L -o lemonade-server_10.2.0_amd64.deb https://github.com/lemonade-sdk/lemonade/releases/download/v10.2.0/lemonade-server_10.2.0_amd64.deb
sudo dpkg -i lemonade-server_10.2.0_amd64.deb
```

Or download directly from the [Lemonade v10.0.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.0.0).
Or download directly from the [Lemonade v10.2.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.2.0).

After installation, start the server:
```bash
Expand Down
Loading
Loading