@@ -240,47 +240,61 @@ jobs:
240240 ./bin/shimmy --version
241241 echo "✅ Binary version check passed"
242242
243- - name : Test 2 - Help shows vision commands
243+ - name : Test 2 - Help shows commands
244244 run : |
245245 ./bin/shimmy --help
246246 echo "✅ Help displayed"
247247
248- - name : Test 3 - Vision OCR on test image
248+ - name : Test 3 - Start server and test Vision API
249249 run : |
250- # Download a test image with text
250+ # Start server in background
251+ ./bin/shimmy serve --bind 127.0.0.1:11435 &
252+ SERVER_PID=$!
253+ echo "Server started with PID $SERVER_PID"
254+
255+ # Wait for server to be ready
256+ for i in {1..30}; do
257+ if curl -s http://127.0.0.1:11435/health > /dev/null 2>&1; then
258+ echo "Server is ready"
259+ break
260+ fi
261+ echo "Waiting for server... ($i/30)"
262+ sleep 2
263+ done
264+
265+ # Download a test image
251266 curl -L -o test-ocr.png "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4f/SVG_Logo.svg/320px-SVG_Logo.svg.png"
252267
253- echo "Running OCR test..."
254- ./bin/shimmy vision --image test-ocr.png --mode ocr --output json > ocr-result.json 2>&1 || true
268+ # Convert image to base64
269+ TEST_IMAGE_B64=$(base64 -w 0 test-ocr.png)
270+
271+ echo "Running Vision API test..."
272+ curl -s -X POST http://127.0.0.1:11435/api/vision \
273+ -H "Content-Type: application/json" \
274+ -d "{\"image\": \"data:image/png;base64,$TEST_IMAGE_B64\", \"prompt\": \"What text do you see in this image?\", \"max_tokens\": 200}" \
275+ > vision-result.json 2>&1 || true
255276
256- echo "OCR Result:"
257- cat ocr -result.json
277+ echo "Vision API Result:"
278+ cat vision -result.json
258279
259- # Check if we got some output (even if JSON parsing fails, we should get something)
260- if [ -s ocr-result.json ]; then
261- echo "✅ Vision OCR produced output"
280+ # Check result
281+ if [ -s vision-result.json ] && grep -q "error\|text\|content" vision-result.json; then
282+ echo "✅ Vision API responded"
283+ VISION_SUCCESS="true"
262284 else
263- echo "⚠️ Vision OCR produced no output (may need model)"
285+ echo "⚠️ Vision API test inconclusive"
286+ VISION_SUCCESS="false"
264287 fi
265-
266- - name : Test 4 - Vision on webpage URL
267- run : |
268- echo "Running web page vision test..."
269- ./bin/shimmy vision --url "https://example.com" --mode web --output json > web-result.json 2>&1 || true
270288
271- echo "Web Vision Result:"
272- cat web-result.json
289+ # Save for test results
290+ echo "$VISION_SUCCESS" > vision-test-status.txt
273291
274- if [ -s web-result.json ]; then
275- echo "✅ Vision web mode produced output"
276- else
277- echo "⚠️ Vision web mode produced no output"
278- fi
292+ # Cleanup
293+ kill $SERVER_PID 2>/dev/null || true
279294
280295 - name : Generate test results
281296 run : |
282- OCR_SUCCESS=$([ -s ocr-result.json ] && echo "true" || echo "false")
283- WEB_SUCCESS=$([ -s web-result.json ] && echo "true" || echo "false")
297+ VISION_SUCCESS=$(cat vision-test-status.txt 2>/dev/null || echo "false")
284298
285299 cat > test-results-linux-x86_64.json << EOF
286300 {
@@ -290,8 +304,7 @@ jobs:
290304 "tests": {
291305 "binary_loads": true,
292306 "help_works": true,
293- "ocr_test": $OCR_SUCCESS,
294- "web_test": $WEB_SUCCESS
307+ "vision_api_test": $VISION_SUCCESS
295308 },
296309 "timestamp": "${{ github.run_id }}"
297310 }
@@ -350,50 +363,61 @@ jobs:
350363 ./bin/shimmy.exe --version || echo "Version check completed"
351364 echo "✅ Binary version check passed"
352365
353- - name : Test 2 - Help shows vision commands
366+ - name : Test 2 - Help shows commands
354367 shell : bash
355368 run : |
356369 ./bin/shimmy.exe --help || echo "Help completed"
357370 echo "✅ Help displayed"
358371
359- - name : Test 3 - Vision OCR on test image
372+ - name : Test 3 - Start server and test Vision API
360373 shell : bash
361374 run : |
362- # Download a test image with text
363- curl -L -o test-ocr.png "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4f/SVG_Logo.svg/320px-SVG_Logo.svg.png"
375+ # Start server in background
376+ ./bin/shimmy.exe serve --bind 127.0.0.1:11435 &
377+ SERVER_PID=$!
378+ echo "Server started with PID $SERVER_PID"
364379
365- echo "Running OCR test..."
366- ./bin/shimmy.exe vision --image test-ocr.png --mode ocr --output json > ocr-result.json 2>&1 || true
380+ # Wait for server to be ready
381+ for i in {1..30}; do
382+ if curl -s http://127.0.0.1:11435/health > /dev/null 2>&1; then
383+ echo "Server is ready"
384+ break
385+ fi
386+ echo "Waiting for server... ($i/30)"
387+ sleep 2
388+ done
367389
368- echo "OCR Result:"
369- cat ocr-result.json
390+ # Download a test image
391+ curl -L -o test- ocr.png "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4f/SVG_Logo.svg/320px-SVG_Logo.svg.png"
370392
371- if [ -s ocr-result.json ]; then
372- echo "✅ Vision OCR produced output"
373- else
374- echo "⚠️ Vision OCR produced no output"
375- fi
376-
377- - name : Test 4 - Vision on webpage URL
378- shell : bash
379- run : |
380- echo "Running web page vision test..."
381- ./bin/shimmy.exe vision --url "https://example.com" --mode web --output json > web-result.json 2>&1 || true
393+ # Convert image to base64 (Windows compatible)
394+ TEST_IMAGE_B64=$(base64 -w 0 test-ocr.png 2>/dev/null || base64 test-ocr.png | tr -d '\n')
382395
383- echo "Web Vision Result:"
384- cat web-result.json
396+ echo "Running Vision API test..."
397+ curl -s -X POST http://127.0.0.1:11435/api/vision \
398+ -H "Content-Type: application/json" \
399+ -d "{\"image\": \"data:image/png;base64,$TEST_IMAGE_B64\", \"prompt\": \"What text do you see in this image?\", \"max_tokens\": 200}" \
400+ > vision-result.json 2>&1 || true
385401
386- if [ -s web-result.json ]; then
387- echo "✅ Vision web mode produced output"
402+ echo "Vision API Result:"
403+ cat vision-result.json
404+
405+ # Check result
406+ if [ -s vision-result.json ] && grep -q "error\|text\|content" vision-result.json; then
407+ echo "✅ Vision API responded"
408+ echo "true" > vision-test-status.txt
388409 else
389- echo "⚠️ Vision web mode produced no output"
410+ echo "⚠️ Vision API test inconclusive"
411+ echo "false" > vision-test-status.txt
390412 fi
413+
414+ # Cleanup - taskkill for Windows
415+ taskkill //F //PID $SERVER_PID 2>/dev/null || kill $SERVER_PID 2>/dev/null || true
391416
392417 - name : Generate test results
393418 shell : bash
394419 run : |
395- OCR_SUCCESS=$([ -s ocr-result.json ] && echo "true" || echo "false")
396- WEB_SUCCESS=$([ -s web-result.json ] && echo "true" || echo "false")
420+ VISION_SUCCESS=$(cat vision-test-status.txt 2>/dev/null || echo "false")
397421
398422 cat > test-results-windows-x86_64.json << EOF
399423 {
@@ -403,8 +427,7 @@ jobs:
403427 "tests": {
404428 "binary_loads": true,
405429 "help_works": true,
406- "ocr_test": $OCR_SUCCESS,
407- "web_test": $WEB_SUCCESS
430+ "vision_api_test": $VISION_SUCCESS
408431 },
409432 "timestamp": "${{ github.run_id }}"
410433 }
@@ -437,17 +460,16 @@ jobs:
437460 run : |
438461 echo "# 👁️ Vision Cross-Platform Test Summary" >> $GITHUB_STEP_SUMMARY
439462 echo "" >> $GITHUB_STEP_SUMMARY
440- echo "| Platform | Vision Enabled | Model Cached | OCR Test | Web Test |" >> $GITHUB_STEP_SUMMARY
441- echo "|----------|----------------|--------------|----------|--- -------|" >> $GITHUB_STEP_SUMMARY
463+ echo "| Platform | Vision Enabled | Model Cached | Vision API Test |" >> $GITHUB_STEP_SUMMARY
464+ echo "|----------|----------------|--------------|-----------------|" >> $GITHUB_STEP_SUMMARY
442465
443466 for file in ./results/*/test-results-*.json; do
444467 if [ -f "$file" ]; then
445468 platform=$(jq -r '.platform' "$file")
446469 vision=$(jq -r '.vision_enabled' "$file")
447470 cached=$(jq -r '.model_cached // "N/A"' "$file")
448- ocr=$(jq -r '.tests.ocr_test // "N/A"' "$file")
449- web=$(jq -r '.tests.web_test // "N/A"' "$file")
450- echo "| $platform | $vision | $cached | $ocr | $web |" >> $GITHUB_STEP_SUMMARY
471+ vision_api=$(jq -r '.tests.vision_api_test // "N/A"' "$file")
472+ echo "| $platform | $vision | $cached | $vision_api |" >> $GITHUB_STEP_SUMMARY
451473 fi
452474 done
453475
0 commit comments