Add MinimalConsole for cleaner CLI output #1084

Workflow file for this run

.github/workflows/test_rag.yml at 8e8c25f

	# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
	# SPDX-License-Identifier: MIT

	name: Test RAG

	on:
	workflow_call:
	push:
	branches: [ main ]
	pull_request:
	branches: [ main ]
	types: [opened, synchronize, reopened, ready_for_review]
	merge_group:
	workflow_dispatch:

	# Cancel in-progress runs when a new run is triggered
	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.ref }}
	cancel-in-progress: true

	permissions:
	contents: read

	jobs:
	test-rag-unit:
	name: RAG Unit Tests
	runs-on: ubuntu-latest
	if: github.event_name != 'pull_request' \|\| github.event.pull_request.draft == false \|\| contains(github.event.pull_request.labels.*.name, 'ready_for_ci')

	steps:
	- uses: actions/checkout@v6

	- name: Free disk space
	uses: ./.github/actions/free-disk-space

	- name: Setup Python environment
	uses: ./.github/actions/setup-venv
	with:
	python-version: '3.12'
	install-package: '.[dev,rag]'
	extra-index-url: 'https://download.pytorch.org/whl/cpu'

	- name: Install additional test dependencies
	run: \|
	uv pip install pytest-cov --python .venv/bin/python

	- name: Run RAG unit tests
	run: \|
	echo "Running RAG unit tests..."
	pytest tests/test_rag.py -v -s --tb=short --cov=src/gaia/rag --cov-report=term-missing

	- name: Upload coverage report
	if: always()
	uses: actions/upload-artifact@v6
	with:
	name: rag-unit-test-coverage
	path: .coverage

	test-rag-integration:
	name: RAG Integration Tests
	runs-on: ${{ contains(github.event.pull_request.labels.*.name, 'stx-test') && 'stx-test' \|\| 'stx' }}
	needs: test-rag-unit
	if: github.event_name != 'pull_request' \|\| github.event.pull_request.draft == false \|\| contains(github.event.pull_request.labels.*.name, 'ready_for_ci')

	steps:
	- uses: actions/checkout@v6

	- name: Setup Python environment
	uses: ./.github/actions/setup-venv
	with:
	python-version: '3.12'
	install-package: '.[dev,rag]'
	extra-index-url: 'https://download.pytorch.org/whl/cpu'

	- name: Install additional test dependencies
	shell: powershell
	run: \|
	uv pip install reportlab --python .venv\Scripts\python.exe

	- name: Install Lemonade Server
	uses: ./.github/actions/install-lemonade

	- name: Start Lemonade Server and Run Tests
	shell: powershell
	run: \|
	# Set console to UTF-8 for Unicode support
	[Console]::OutputEncoding = [System.Text.Encoding]::UTF8
	[Console]::InputEncoding = [System.Text.Encoding]::UTF8
	$OutputEncoding = [System.Text.Encoding]::UTF8
	$env:PYTHONIOENCODING = "utf-8"
	$env:PYTHONUTF8 = "1"
	chcp 65001 \| Out-Null

	try {
	Write-Host "Starting Lemonade server..."
	$serverJob = Start-Job -ScriptBlock {
	# Workaround for Issue #612: Disable Vulkan cooperative matrix optimization
	$env:GGML_VK_DISABLE_COOPMAT = "1"
	& lemonade-server serve --host localhost --port 8000 --ctx-size 8192 --no-tray 2>&1
	}
	Write-Host "Started Lemonade server job with ID: $($serverJob.Id)"
	$env:LEMONADE_JOB_ID = $serverJob.Id

	# Wait for server to be ready
	Write-Host "Waiting for Lemonade server to start..."
	$maxWaitTime = 60
	$waitTime = 0
	$serverReady = $false

	while ($waitTime -lt $maxWaitTime -and -not $serverReady) {
	Start-Sleep -Seconds 2
	$waitTime += 2

	try {
	$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 5
	Write-Host "[OK] Lemonade server is ready"
	Write-Host "Health response: $($response \| ConvertTo-Json -Compress)"
	$serverReady = $true
	} catch {
	Write-Host "Waiting... ($waitTime/$maxWaitTime seconds)"
	}
	}

	if (-not $serverReady) {
	Write-Host "[ERROR] Server health check failed after $maxWaitTime seconds"
	throw "Server failed to start"
	}

	# Clear any cached/corrupted model files to force fresh download
	# See: https://github.com/ggml-org/llama.cpp/issues/13534
	Write-Host "`n=== Clearing Model Cache ==="
	$lemonadeCache = "$env:LOCALAPPDATA\lemonade-server"
	if (Test-Path "$lemonadeCache\models") {
	Write-Host "Removing cached models from: $lemonadeCache\models"
	Get-ChildItem "$lemonadeCache\models" -Directory \| ForEach-Object {
	Write-Host " Removing: $($_.Name)"
	Remove-Item $_.FullName -Recurse -Force -ErrorAction SilentlyContinue
	}
	}

	# Pull required models (actual models used in RAG tests)
	Write-Host "`n=== Pulling Required Models ==="

	# Pull LLM model
	Write-Host "Pulling Qwen3-4B-Instruct-2507-GGUF..."
	try {
	$body = @{ model_name = "Qwen3-4B-Instruct-2507-GGUF" } \| ConvertTo-Json
	$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
	-Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
	Write-Host " [OK] Qwen3-4B-Instruct-2507-GGUF pull initiated"
	} catch {
	Write-Host " [WARN] Pull may have failed: $($_.Exception.Message)"
	}

	# Pull embedding model (actual RAG default)
	Write-Host "Pulling nomic-embed-text-v2-moe-GGUF..."
	try {
	$body = @{ model_name = "nomic-embed-text-v2-moe-GGUF" } \| ConvertTo-Json
	$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
	-Method POST -ContentType "application/json" -Body $body -TimeoutSec 600
	Write-Host " [OK] nomic-embed-text-v2-moe-GGUF pull initiated"
	} catch {
	Write-Host " [WARN] Pull may have failed: $($_.Exception.Message)"
	}

	# Pull VLM model (for PDFs with images)
	Write-Host "Pulling Qwen3-VL-4B-Instruct-GGUF..."
	try {
	$body = @{ model_name = "Qwen3-VL-4B-Instruct-GGUF" } \| ConvertTo-Json
	$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/pull" `
	-Method POST -ContentType "application/json" -Body $body -TimeoutSec 1200
	Write-Host " [OK] Qwen3-VL-4B-Instruct-GGUF pull initiated"
	} catch {
	Write-Host " [WARN] Pull may have failed: $($_.Exception.Message)"
	}

	# Load embedding model into memory (required in Lemonade v9.x)
	Write-Host "`n=== Loading Embedding Model ==="
	try {
	$loadRequest = @{
	model_name = "nomic-embed-text-v2-moe-GGUF"
	} \| ConvertTo-Json

	Write-Host "Loading model: nomic-embed-text-v2-moe-GGUF"
	$loadResponse = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/load" `
	-Method POST -Body $loadRequest -ContentType "application/json" -TimeoutSec 60
	Write-Host "[OK] Model loaded successfully: $($loadResponse \| ConvertTo-Json -Compress)"
	} catch {
	Write-Host "[ERROR] Model load failed: $($_.Exception.Message)"
	if ($_.ErrorDetails) {
	Write-Host "Error details: $($_.ErrorDetails.Message)"
	}
	throw "Failed to load embedding model"
	}

	# Wait for llamacpp backend to fully initialize (increased from 10s)
	Write-Host "Waiting 30 seconds for llamacpp backend initialization..."
	Start-Sleep -Seconds 30

	# Verify models
	try {
	$models = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/models" -Method GET
	Write-Host "`n[OK] Available models:"
	$models.data \| ForEach-Object { Write-Host " - $($_.id)" }
	} catch {
	Write-Host "[WARN] Could not list models: $($_.Exception.Message)"
	}

	# Verify server is still responding before embeddings test
	Write-Host "`n=== Verifying Server Health ==="
	try {
	$health = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/health" -Method GET -TimeoutSec 10
	Write-Host "[OK] Server responding: $($health \| ConvertTo-Json -Compress)"
	} catch {
	Write-Host "[ERROR] Server health check failed: $($_.Exception.Message)"
	# Show server job output for debugging
	if ($env:LEMONADE_JOB_ID) {
	$jobOutput = Receive-Job -Id $env:LEMONADE_JOB_ID -ErrorAction SilentlyContinue
	Write-Host "Server output: $jobOutput"
	}
	throw "Server not responding after model load"
	}

	# Verify embedding model with actual API call
	Write-Host "`n=== Verifying Embedding Model ==="
	$maxRetries = 3
	$retryCount = 0
	$modelReady = $false

	while ($retryCount -lt $maxRetries -and -not $modelReady) {
	try {
	$testBody = @{ input = @("test embedding"); model = "nomic-embed-text-v2-moe-GGUF" } \| ConvertTo-Json
	# Use localhost consistently and increased timeout for first embedding request
	$response = Invoke-RestMethod -Uri "http://localhost:8000/api/v1/embeddings" `
	-Method POST -ContentType "application/json" -Body $testBody -TimeoutSec 300
	Write-Host "[OK] Embedding model verified successfully"
	$modelReady = $true
	} catch {
	$retryCount++
	Write-Host "[WARN] Embedding verification attempt $retryCount failed: $($_.Exception.Message)"
	if ($retryCount -lt $maxRetries) {
	Write-Host "Waiting 30 seconds before retry..."
	Start-Sleep -Seconds 30
	}
	}
	}

	if (-not $modelReady) {
	throw "Embedding model failed to load after $maxRetries attempts"
	}

	# Run tests in same session while server is running
	Write-Host "`n=== Running RAG Integration Tests ==="
	python tests/test_rag_integration.py
	$testExitCode = $LASTEXITCODE

	Write-Host "`nTests completed with exit code: $testExitCode"

	if ($testExitCode -ne 0) {
	throw "Tests failed with exit code $testExitCode"
	}
	} finally {
	# Always cleanup server
	if ($env:LEMONADE_JOB_ID) {
	Write-Host "`n=== Stopping Lemonade Server ==="
	Stop-Job -Id $env:LEMONADE_JOB_ID -ErrorAction SilentlyContinue
	Remove-Job -Id $env:LEMONADE_JOB_ID -ErrorAction SilentlyContinue
	Write-Host "[OK] Server stopped"
	}
	}

	- name: Upload test results
	if: always()
	uses: actions/upload-artifact@v6
	with:
	name: rag-integration-test-results
	path: \|
	pytest-results/
	*.log

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add MinimalConsole for cleaner CLI output #1084

Workflow file

Add MinimalConsole for cleaner CLI output #1084

Uh oh!

Workflow file for this run