Add MinimalConsole for cleaner CLI output #1084
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. | |
| # SPDX-License-Identifier: MIT | |
| name: Test RAG | |
| on: | |
| workflow_call: | |
| push: | |
| branches: [ main ] | |
| pull_request: | |
| branches: [ main ] | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| merge_group: | |
| workflow_dispatch: | |
| # Cancel in-progress runs when a new run is triggered | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| jobs: | |
| test-rag-unit: | |
| name: RAG Unit Tests | |
| runs-on: ubuntu-latest | |
| if: github.event_name != 'pull_request' || github.event.pull_request.draft == false || contains(github.event.pull_request.labels.*.name, 'ready_for_ci') | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Free disk space | |
| uses: ./.github/actions/free-disk-space | |
| - name: Setup Python environment | |
| uses: ./.github/actions/setup-venv | |
| with: | |
| python-version: '3.12' | |
| install-package: '.[dev,rag]' | |
| extra-index-url: 'https://download.pytorch.org/whl/cpu' | |
| - name: Install additional test dependencies | |
| run: | | |
| uv pip install pytest-cov --python .venv/bin/python | |
| - name: Run RAG unit tests | |
| run: | | |
| echo "Running RAG unit tests..." | |
| pytest tests/test_rag.py -v -s --tb=short --cov=src/gaia/rag --cov-report=term-missing | |
| - name: Upload coverage report | |
| if: always() | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: rag-unit-test-coverage | |
| path: .coverage | |
| test-rag-integration: | |
| name: RAG Integration Tests | |
| runs-on: ${{ contains(github.event.pull_request.labels.*.name, 'stx-test') && 'stx-test' || 'stx' }} | |
| needs: test-rag-unit | |
| if: github.event_name != 'pull_request' || github.event.pull_request.draft == false || contains(github.event.pull_request.labels.*.name, 'ready_for_ci') | |
| steps: | |
| - uses: actions/checkout@v6 | |
| - name: Setup Python environment | |
| uses: ./.github/actions/setup-venv | |
| with: | |
| python-version: '3.12' | |
| install-package: '.[dev,rag]' | |
| extra-index-url: 'https://download.pytorch.org/whl/cpu' | |
| - name: Install additional test dependencies | |
| shell: powershell | |
| run: | | |
| uv pip install reportlab --python .venv\Scripts\python.exe | |
| - name: Install Lemonade Server | |
| uses: ./.github/actions/install-lemonade | |
| - name: Start Lemonade Server and Run Tests | |
| shell: powershell | |
| run: | | |
| # Set console to UTF-8 for Unicode support | |
| [Console]::OutputEncoding = [System.Text.Encoding]::UTF8 | |
| [Console]::InputEncoding = [System.Text.Encoding]::UTF8 | |
| $OutputEncoding = [System.Text.Encoding]::UTF8 | |
| $env:PYTHONIOENCODING = "utf-8" | |
| $env:PYTHONUTF8 = "1" | |
| chcp 65001 | Out-Null | |
| try { | |
| Write-Host "Starting Lemonade server..." | |
| $serverJob = Start-Job -ScriptBlock { | |
| # Workaround for Issue #612: Disable Vulkan cooperative matrix optimization | |
| $env:GGML_VK_DISABLE_COOPMAT = "1" | |
| & lemonade-server serve --host localhost --port 8000 --ctx-size 8192 --no-tray 2>&1 | |
| } | |
| Write-Host "Started Lemonade server job with ID: $($serverJob.Id)" | |
| $env:LEMONADE_JOB_ID = $serverJob.Id | |
| # Wait for server to be ready | |
| Write-Host "Waiting for Lemonade server to start..." | |
| $maxWaitTime = 60 | |
| $waitTime = 0 | |
| $serverReady = $false | |
| while ($waitTime -lt $maxWaitTime -and -not $serverReady) { | |
| Start-Sleep -Seconds 2 | |
| $waitTime += 2 | |
| try { | |
| $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5 | |
| Write-Host "[OK] Lemonade server is ready" | |
| Write-Host "Health response: $($response | ConvertTo-Json -Compress)" | |
| $serverReady = $true | |
| } catch { | |
| Write-Host "Waiting... ($waitTime/$maxWaitTime seconds)" | |
| } | |
| } | |
| if (-not $serverReady) { | |
| Write-Host "[ERROR] Server health check failed after $maxWaitTime seconds" | |
| throw "Server failed to start" | |
| } | |
| # Clear any cached/corrupted model files to force fresh download | |
| # See: https://github.com/ggml-org/llama.cpp/issues/13534 | |
| Write-Host "`n=== Clearing Model Cache ===" | |
| $lemonadeCache = "$env:LOCALAPPDATA\lemonade-server" | |
| if (Test-Path "$lemonadeCache\models") { | |
| Write-Host "Removing cached models from: $lemonadeCache\models" | |
| Get-ChildItem "$lemonadeCache\models" -Directory | ForEach-Object { | |
| Write-Host " Removing: $($_.Name)" | |
| Remove-Item $_.FullName -Recurse -Force -ErrorAction SilentlyContinue | |
| } | |
| } | |
| # Pull required models (actual models used in RAG tests) | |
| Write-Host "`n=== Pulling Required Models ===" | |
| # Pull LLM model | |
| Write-Host "Pulling Qwen3-4B-Instruct-2507-GGUF..." | |
| try { | |
| $body = @{ model_name = "Qwen3-4B-Instruct-2507-GGUF" } | ConvertTo-Json | |
| $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" ` | |
| -Method POST -ContentType "application/json" -Body $body -TimeoutSec 600 | |
| Write-Host " [OK] Qwen3-4B-Instruct-2507-GGUF pull initiated" | |
| } catch { | |
| Write-Host " [WARN] Pull may have failed: $($_.Exception.Message)" | |
| } | |
| # Pull embedding model (actual RAG default) | |
| Write-Host "Pulling nomic-embed-text-v2-moe-GGUF..." | |
| try { | |
| $body = @{ model_name = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json | |
| $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" ` | |
| -Method POST -ContentType "application/json" -Body $body -TimeoutSec 600 | |
| Write-Host " [OK] nomic-embed-text-v2-moe-GGUF pull initiated" | |
| } catch { | |
| Write-Host " [WARN] Pull may have failed: $($_.Exception.Message)" | |
| } | |
| # Pull VLM model (for PDFs with images) | |
| Write-Host "Pulling Qwen3-VL-4B-Instruct-GGUF..." | |
| try { | |
| $body = @{ model_name = "Qwen3-VL-4B-Instruct-GGUF" } | ConvertTo-Json | |
| $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" ` | |
| -Method POST -ContentType "application/json" -Body $body -TimeoutSec 1200 | |
| Write-Host " [OK] Qwen3-VL-4B-Instruct-GGUF pull initiated" | |
| } catch { | |
| Write-Host " [WARN] Pull may have failed: $($_.Exception.Message)" | |
| } | |
| # Load embedding model into memory (required in Lemonade v9.x) | |
| Write-Host "`n=== Loading Embedding Model ===" | |
| try { | |
| $loadRequest = @{ | |
| model_name = "nomic-embed-text-v2-moe-GGUF" | |
| } | ConvertTo-Json | |
| Write-Host "Loading model: nomic-embed-text-v2-moe-GGUF" | |
| $loadResponse = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/load" ` | |
| -Method POST -Body $loadRequest -ContentType "application/json" -TimeoutSec 60 | |
| Write-Host "[OK] Model loaded successfully: $($loadResponse | ConvertTo-Json -Compress)" | |
| } catch { | |
| Write-Host "[ERROR] Model load failed: $($_.Exception.Message)" | |
| if ($_.ErrorDetails) { | |
| Write-Host "Error details: $($_.ErrorDetails.Message)" | |
| } | |
| throw "Failed to load embedding model" | |
| } | |
| # Wait for llamacpp backend to fully initialize (increased from 10s) | |
| Write-Host "Waiting 30 seconds for llamacpp backend initialization..." | |
| Start-Sleep -Seconds 30 | |
| # Verify models | |
| try { | |
| $models = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/models" -Method GET | |
| Write-Host "`n[OK] Available models:" | |
| $models.data | ForEach-Object { Write-Host " - $($_.id)" } | |
| } catch { | |
| Write-Host "[WARN] Could not list models: $($_.Exception.Message)" | |
| } | |
| # Verify server is still responding before embeddings test | |
| Write-Host "`n=== Verifying Server Health ===" | |
| try { | |
| $health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10 | |
| Write-Host "[OK] Server responding: $($health | ConvertTo-Json -Compress)" | |
| } catch { | |
| Write-Host "[ERROR] Server health check failed: $($_.Exception.Message)" | |
| # Show server job output for debugging | |
| if ($env:LEMONADE_JOB_ID) { | |
| $jobOutput = Receive-Job -Id $env:LEMONADE_JOB_ID -ErrorAction SilentlyContinue | |
| Write-Host "Server output: $jobOutput" | |
| } | |
| throw "Server not responding after model load" | |
| } | |
| # Verify embedding model with actual API call | |
| Write-Host "`n=== Verifying Embedding Model ===" | |
| $maxRetries = 3 | |
| $retryCount = 0 | |
| $modelReady = $false | |
| while ($retryCount -lt $maxRetries -and -not $modelReady) { | |
| try { | |
| $testBody = @{ input = @("test embedding"); model = "nomic-embed-text-v2-moe-GGUF" } | ConvertTo-Json | |
| # Use localhost consistently and increased timeout for first embedding request | |
| $response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/embeddings" ` | |
| -Method POST -ContentType "application/json" -Body $testBody -TimeoutSec 300 | |
| Write-Host "[OK] Embedding model verified successfully" | |
| $modelReady = $true | |
| } catch { | |
| $retryCount++ | |
| Write-Host "[WARN] Embedding verification attempt $retryCount failed: $($_.Exception.Message)" | |
| if ($retryCount -lt $maxRetries) { | |
| Write-Host "Waiting 30 seconds before retry..." | |
| Start-Sleep -Seconds 30 | |
| } | |
| } | |
| } | |
| if (-not $modelReady) { | |
| throw "Embedding model failed to load after $maxRetries attempts" | |
| } | |
| # Run tests in same session while server is running | |
| Write-Host "`n=== Running RAG Integration Tests ===" | |
| python tests/test_rag_integration.py | |
| $testExitCode = $LASTEXITCODE | |
| Write-Host "`nTests completed with exit code: $testExitCode" | |
| if ($testExitCode -ne 0) { | |
| throw "Tests failed with exit code $testExitCode" | |
| } | |
| } finally { | |
| # Always cleanup server | |
| if ($env:LEMONADE_JOB_ID) { | |
| Write-Host "`n=== Stopping Lemonade Server ===" | |
| Stop-Job -Id $env:LEMONADE_JOB_ID -ErrorAction SilentlyContinue | |
| Remove-Job -Id $env:LEMONADE_JOB_ID -ErrorAction SilentlyContinue | |
| Write-Host "[OK] Server stopped" | |
| } | |
| } | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: rag-integration-test-results | |
| path: | | |
| pytest-results/ | |
| *.log |