amd
diff --git a/‎.github/workflows/build_cpp.yml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/build_cpp.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/test_agent_sdk.yml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/test_agent_sdk.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/workflows/test_api.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/test_api.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.github/workflows/test_embeddings.yml‎
Lines changed: 7 additions & 7 deletions b/‎.github/workflows/test_embeddings.yml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎.github/workflows/test_gaia_cli_linux.yml‎
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/test_gaia_cli_linux.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/test_gaia_cli_windows.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/test_gaia_cli_windows.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/test_lemonade_server.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/test_lemonade_server.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/test_rag.yml‎
Lines changed: 9 additions & 9 deletions b/‎.github/workflows/test_rag.yml‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎cpp/README.md‎
Lines changed: 5 additions & 5 deletions b/‎cpp/README.md‎
Lines changed: 5 additions & 5 deletions
@@ -293,21 +293,21 @@ jobs:
         timeout-minutes: 30
         env:
           GAIA_CPP_TEST_MODEL: Qwen3-4B-Instruct-2507-GGUF
-          GAIA_CPP_BASE_URL: http://localhost:8000/api/v1
+          GAIA_CPP_BASE_URL: http://localhost:13305/api/v1
         run: |
           try {
               # Start Lemonade with Qwen3-4B-GGUF
-              .\installer\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-Instruct-2507-GGUF" -Port 8000 -CtxSize 16384 -InitWaitTime 15
+              .\installer\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-Instruct-2507-GGUF" -Port 13305 -CtxSize 16384 -InitWaitTime 15
 
               # Verify health
-              $health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10
+              $health = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 10
               if ($health.status -ne "ok") { throw "Lemonade health check failed" }
               Write-Host "[OK] Lemonade Server ready with Qwen3-4B-Instruct-2507-GGUF"
 
               # Run all C++ integration tests (LLM + MCP + WiFi + Health)
               Write-Host "=== Running C++ Integration Tests (LLM + MCP + WiFi + Health) ==="
               $env:GAIA_CPP_TEST_MODEL = "Qwen3-4B-Instruct-2507-GGUF"
-              $env:GAIA_CPP_BASE_URL   = "http://localhost:8000/api/v1"
+              $env:GAIA_CPP_BASE_URL   = "http://localhost:13305/api/v1"
               # -j 1: run tests sequentially so they don't compete for the single LLM server
               ctest --test-dir cpp/build-integration -C Release --output-on-failure -j 1
               if ($LASTEXITCODE -ne 0) { throw "C++ integration tests failed" }
 
@@ -83,7 +83,7 @@ jobs:
           # Start the server in the background as a process (not PowerShell job)
           Write-Host "Starting lemonade-server in background..."
           # Start the server as a background process
-          $serverProcess = Start-Process -FilePath "lemonade-server" -ArgumentList "serve", "--no-tray" -PassThru -WindowStyle Hidden
+          $serverProcess = Start-Process -FilePath "lemonade-server" -ArgumentList "serve", "--no-tray", "--port", "13305" -PassThru -WindowStyle Hidden
           Write-Host "Started lemonade-server process with ID: $($serverProcess.Id)"
 
           # Wait for server to start up
@@ -97,7 +97,7 @@ jobs:
               $waitTime += 2
 
               try {
-                  $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
+                  $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
                   Write-Host "Server is ready and responding to health checks"
                   $serverReady = $true
               } catch {
@@ -145,6 +145,8 @@ jobs:
 
           REM Run the comprehensive integration test suite
           set PYTHONIOENCODING=utf-8
+          REM Use the model that was pulled above (overrides DEFAULT_MODEL_NAME=Gemma-4-E4B)
+          set GAIA_TEST_MODEL=Llama-3.2-3B-Instruct-Hybrid
           python tests\test_agent_sdk.py
           set integration_exit=%ERRORLEVEL%
 
 
@@ -77,7 +77,7 @@ jobs:
               $serverJob = Start-Job -ScriptBlock {
                   # Workaround for Issue #612: Disable Vulkan cooperative matrix optimization
                   $env:GGML_VK_DISABLE_COOPMAT = "1"
-                  & lemonade-server serve --ctx-size 8192 --host localhost --port 8000 --no-tray 2>&1
+                  & lemonade-server serve --ctx-size 8192 --host localhost --port 13305 --no-tray 2>&1
               }
               Write-Host "Started Lemonade server job with ID: $($serverJob.Id)"
               $env:LEMONADE_JOB_ID = $serverJob.Id
@@ -93,7 +93,7 @@ jobs:
                   $waitTime += 2
 
                   try {
-                      $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
+                      $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
                       Write-Host "[OK] Lemonade server is ready"
                       Write-Host "Health response: $($response | ConvertTo-Json -Compress)"
                       $serverReady = $true
@@ -112,7 +112,7 @@ jobs:
               Write-Host "Pulling Qwen3-0.6B-GGUF..."
               try {
                   $body = @{ model_name = "Qwen3-0.6B-GGUF" } | ConvertTo-Json
-                  $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
+                  $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
                       -Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
                   Write-Host "   [OK] Qwen3-0.6B-GGUF pull initiated"
               } catch {
@@ -128,7 +128,7 @@ jobs:
               try {
                   $loadRequest = @{ model_name = "Qwen3-0.6B-GGUF" } | ConvertTo-Json
                   Write-Host "Loading model: Qwen3-0.6B-GGUF"
-                  $loadResponse = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/load" `
+                  $loadResponse = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/load" `
                       -Method POST -Body $loadRequest -ContentType "application/json" -TimeoutSec 120
                   Write-Host "[OK] Model loaded successfully: $($loadResponse | ConvertTo-Json -Compress)"
               } catch {
@@ -144,7 +144,7 @@ jobs:
 
               # Verify models
               try {
-                  $models = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/models" -Method GET
+                  $models = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/models" -Method GET
                   Write-Host "`n[OK] Available models:"
                   $models.data | ForEach-Object { Write-Host "   - $($_.id)" }
               } catch {
 
@@ -68,7 +68,7 @@ jobs:
             $serverJob = Start-Job -ScriptBlock {
                 # Workaround for Issue #612: Disable Vulkan cooperative matrix optimization
                 $env:GGML_VK_DISABLE_COOPMAT = "1"
-                & lemonade-server serve --host localhost --port 8000 --no-tray 2>&1
+                & lemonade-server serve --host localhost --port 13305 --no-tray 2>&1
             }
             Write-Host "Started Lemonade server job with ID: $($serverJob.Id)"
             $env:LEMONADE_JOB_ID = $serverJob.Id
@@ -84,7 +84,7 @@ jobs:
                 $waitTime += 2
 
                 try {
-                    $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
+                    $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
                     Write-Host "[OK] Lemonade server is ready"
                     Write-Host "Health response: $($response | ConvertTo-Json -Compress)"
                     $serverReady = $true
@@ -115,7 +115,7 @@ jobs:
             Write-Host "Pulling nomic-embed-text-v2-moe-GGUF..."
             try {
                 $body = @{ model_name = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json
-                $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
+                $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
                     -Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
                 Write-Host "   [OK] Model pull initiated"
             } catch {
@@ -130,7 +130,7 @@ jobs:
                 } | ConvertTo-Json
 
                 Write-Host "Loading model: nomic-embed-text-v2-moe-GGUF"
-                $loadResponse = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/load" `
+                $loadResponse = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/load" `
                     -Method POST -Body $loadRequest -ContentType "application/json" -TimeoutSec 60
                 Write-Host "[OK] Model loaded successfully: $($loadResponse | ConvertTo-Json -Compress)"
             } catch {
@@ -147,7 +147,7 @@ jobs:
 
             # Verify model is available
             try {
-                $models = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/models" -Method GET
+                $models = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/models" -Method GET
                 Write-Host "`n[OK] Available models:"
                 $models.data | ForEach-Object { Write-Host "   - $($_.id)" }
             } catch {
@@ -157,7 +157,7 @@ jobs:
             # Verify server is still responding before embeddings test
             Write-Host "`n=== Verifying Server Health ==="
             try {
-                $health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10
+                $health = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 10
                 Write-Host "[OK] Server responding: $($health | ConvertTo-Json -Compress)"
             } catch {
                 Write-Host "[ERROR] Server health check failed: $($_.Exception.Message)"
@@ -179,7 +179,7 @@ jobs:
                 try {
                     $testBody = @{ input = @("test embedding"); model = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json
                     # Use localhost consistently and increased timeout for first embedding request
-                    $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/embeddings" `
+                    $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/embeddings" `
                         -Method POST -ContentType "application/json" -Body $testBody -TimeoutSec 300
                     Write-Host "[OK] Embedding model verified successfully"
                     $modelReady = $true
 
@@ -138,6 +138,9 @@ jobs:
           echo "=== Listing Available Models ==="
           curl -s http://localhost:8000/api/v1/models | jq '.' || echo "Could not list models"
 
+          # Python lemonade-server-dev runs on port 8000; tell GAIA CLI where to connect
+          export LEMONADE_BASE_URL=http://localhost:8000/api/v1
+
           echo "=== Testing Core GAIA CLI Commands with Lemonade ==="
 
           # Test chat command with Qwen model (should now work with Lemonade)
@@ -191,7 +194,8 @@ jobs:
           echo "Testing LemonadeClient API with running server"
 
           # Run the lemonade client integration tests (skip hybrid NPU test - no NPU on Linux)
-          GAIA_TEST_MODEL="Qwen3-0.6B-GGUF" python -m pytest tests/test_lemonade_client.py -vs --tb=short -k "Integration and not hybrid" || LEMONADE_TEST_EXIT=$?
+          # LEMONADE_PORT=8000: lemonade-server-dev always binds to 8000 (no --port flag)
+          LEMONADE_PORT=8000 GAIA_TEST_MODEL="Qwen3-0.6B-GGUF" python -m pytest tests/test_lemonade_client.py -vs --tb=short -k "Integration and not hybrid" || LEMONADE_TEST_EXIT=$?
 
           if [ "${LEMONADE_TEST_EXIT:-0}" -eq 0 ]; then
             echo "✅ Lemonade client integration tests passed successfully!"
 
@@ -92,7 +92,7 @@ jobs:
           # Start the server in the background as a process (not PowerShell job)
           Write-Host "Starting lemonade-server in background..."
           # Start the server as a background process
-          $serverProcess = Start-Process -FilePath "lemonade-server" -ArgumentList "serve", "--no-tray" -PassThru -WindowStyle Hidden
+          $serverProcess = Start-Process -FilePath "lemonade-server" -ArgumentList "serve", "--no-tray", "--port", "13305" -PassThru -WindowStyle Hidden
           Write-Host "Started lemonade-server process with ID: $($serverProcess.Id)"
 
           # Wait for server to start up
@@ -106,7 +106,7 @@ jobs:
               $waitTime += 2
 
               try {
-                  $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
+                  $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
                   Write-Host "Server is ready and responding to health checks"
                   $serverReady = $true
               } catch {
 
@@ -53,11 +53,11 @@ jobs:
         run: |
           try {
               # Start server and load model (all in one session)
-              .\installer\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-Instruct-2507-GGUF" -Port 8000 -CtxSize 32768 -InitWaitTime 10
+              .\installer\scripts\start-lemonade.ps1 -ModelName "Qwen3-4B-Instruct-2507-GGUF" -Port 13305 -CtxSize 32768 -InitWaitTime 10
 
               # Verify health endpoint
               Write-Host "=== Verifying Health Endpoint ==="
-              $health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10
+              $health = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 10
               Write-Host "Health response: $($health | ConvertTo-Json -Compress)"
 
               if ($health.status -ne "ok") {
@@ -93,7 +93,7 @@ jobs:
                   max_tokens = 10
               } | ConvertTo-Json
 
-              $completion = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/completions" `
+              $completion = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/completions" `
                   -Method POST -ContentType "application/json" -Body $testBody -TimeoutSec 30
 
               Write-Host "[OK] Completion successful"
 
@@ -110,7 +110,7 @@ jobs:
             $serverJob = Start-Job -ScriptBlock {
                 # Workaround for Issue #612: Disable Vulkan cooperative matrix optimization
                 $env:GGML_VK_DISABLE_COOPMAT = "1"
-                & lemonade-server serve --host localhost --port 8000 --ctx-size 8192 --no-tray 2>&1
+                & lemonade-server serve --host localhost --port 13305 --ctx-size 8192 --no-tray 2>&1
             }
             Write-Host "Started Lemonade server job with ID: $($serverJob.Id)"
             $env:LEMONADE_JOB_ID = $serverJob.Id
@@ -126,7 +126,7 @@ jobs:
                 $waitTime += 2
 
                 try {
-                    $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
+                    $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 5
                     Write-Host "[OK] Lemonade server is ready"
                     Write-Host "Health response: $($response | ConvertTo-Json -Compress)"
                     $serverReady = $true
@@ -159,7 +159,7 @@ jobs:
             Write-Host "Pulling Qwen3-4B-Instruct-2507-GGUF..."
             try {
                 $body = @{ model_name = "Qwen3-4B-Instruct-2507-GGUF" } | ConvertTo-Json
-                $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
+                $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
                     -Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
                 Write-Host "   [OK] Qwen3-4B-Instruct-2507-GGUF pull initiated"
             } catch {
@@ -170,7 +170,7 @@ jobs:
             Write-Host "Pulling nomic-embed-text-v2-moe-GGUF..."
             try {
                 $body = @{ model_name = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json
-                $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
+                $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
                     -Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
                 Write-Host "   [OK] nomic-embed-text-v2-moe-GGUF pull initiated"
             } catch {
@@ -181,7 +181,7 @@ jobs:
             Write-Host "Pulling Qwen3-VL-4B-Instruct-GGUF..."
             try {
                 $body = @{ model_name = "Qwen3-VL-4B-Instruct-GGUF" } | ConvertTo-Json
-                $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
+                $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/pull" `
                     -Method POST -ContentType "application/json" -Body $body -TimeoutSec 1200
                 Write-Host "   [OK] Qwen3-VL-4B-Instruct-GGUF pull initiated"
             } catch {
@@ -196,7 +196,7 @@ jobs:
                 } | ConvertTo-Json
 
                 Write-Host "Loading model: nomic-embed-text-v2-moe-GGUF"
-                $loadResponse = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/load" `
+                $loadResponse = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/load" `
                     -Method POST -Body $loadRequest -ContentType "application/json" -TimeoutSec 60
                 Write-Host "[OK] Model loaded successfully: $($loadResponse | ConvertTo-Json -Compress)"
             } catch {
@@ -213,7 +213,7 @@ jobs:
 
             # Verify models
             try {
-                $models = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/models" -Method GET
+                $models = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/models" -Method GET
                 Write-Host "`n[OK] Available models:"
                 $models.data | ForEach-Object { Write-Host "   - $($_.id)" }
             } catch {
@@ -223,7 +223,7 @@ jobs:
             # Verify server is still responding before embeddings test
             Write-Host "`n=== Verifying Server Health ==="
             try {
-                $health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10
+                $health = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/health" -Method GET -TimeoutSec 10
                 Write-Host "[OK] Server responding: $($health | ConvertTo-Json -Compress)"
             } catch {
                 Write-Host "[ERROR] Server health check failed: $($_.Exception.Message)"
@@ -245,7 +245,7 @@ jobs:
                 try {
                     $testBody = @{ input = @("test embedding"); model = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json
                     # Use localhost consistently and increased timeout for first embedding request
-                    $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/embeddings" `
+                    $response = Invoke-RestMethod -Uri "http://localhost:13305/api/v1/embeddings" `
                         -Method POST -ContentType "application/json" -Body $testBody -TimeoutSec 300
                     Write-Host "[OK] Embedding model verified successfully"
                     $modelReady = $true
 
@@ -28,23 +28,23 @@ Included demos:
 
 The agent connects to an OpenAI-compatible LLM server at `http://localhost:8000/api/v1` by default. The reference backend is [Lemonade Server](https://github.com/lemonade-sdk/lemonade), which runs models locally on AMD hardware.
 
-Download and install Lemonade Server v10.0.0, then start it:
+Download and install Lemonade Server v10.2.0, then start it:
 
 **Windows:**
 ```powershell
 # Download and run the MSI installer
-curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.0.0/lemonade-server-minimal.msi
+curl -L -o lemonade-server-minimal.msi https://github.com/lemonade-sdk/lemonade/releases/download/v10.2.0/lemonade-server-minimal.msi
 msiexec /i lemonade-server-minimal.msi
 ```
 
 **Linux:**
 ```bash
 # Download and install the .deb package
-curl -L -o lemonade-server_10.0.0_amd64.deb https://github.com/lemonade-sdk/lemonade/releases/download/v10.0.0/lemonade-server_10.0.0_amd64.deb
-sudo dpkg -i lemonade-server_10.0.0_amd64.deb
+curl -L -o lemonade-server_10.2.0_amd64.deb https://github.com/lemonade-sdk/lemonade/releases/download/v10.2.0/lemonade-server_10.2.0_amd64.deb
+sudo dpkg -i lemonade-server_10.2.0_amd64.deb
 ```
 
-Or download directly from the [Lemonade v10.0.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.0.0).
+Or download directly from the [Lemonade v10.2.0 release page](https://github.com/lemonade-sdk/lemonade/releases/tag/v10.2.0).
 
 After installation, start the server:
 ```bash