Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 84 additions & 8 deletions .github/workflows/tessl-eval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,28 +127,99 @@ jobs:
run: |
PASS=0
FAIL=0
SUMMARY_ROWS=""
SUMMARY_FILE=$(mktemp)
ERRORS_FILE=$(mktemp)
STDOUT_TMP=$(mktemp)
STDERR_TMP=$(mktemp)
trap 'rm -f "$SUMMARY_FILE" "$ERRORS_FILE" "$STDOUT_TMP" "$STDERR_TMP"' EXIT

POLL_INTERVAL=30
TIMEOUT=900

for tile_dir in ${{ steps.detect.outputs.dirs }}; do
TILE_NAME=$(basename "$tile_dir")
echo "::group::Evaluating $TILE_NAME ($tile_dir)"

EXIT_CODE=0
OUTPUT=$(tessl eval run "$tile_dir" 2>&1) || EXIT_CODE=$?
OUTPUT=$(tessl eval run "$tile_dir" --workspace adobe 2>&1) || EXIT_CODE=$?
echo "$OUTPUT"
echo "::endgroup::"

if [ "$EXIT_CODE" -ne 0 ]; then
echo "::warning::tessl eval run failed for $TILE_NAME (exit code $EXIT_CODE)"
FAIL=$((FAIL + 1))
SUMMARY_ROWS="$SUMMARY_ROWS| $TILE_NAME | error | ❌ |\n"
else
echo "| $TILE_NAME | error | ❌ |" >> "$SUMMARY_FILE"
echo " ❌ $TILE_NAME: eval run failed (exit code $EXIT_CODE)" >> "$ERRORS_FILE"
continue
fi

# Extract run ID (UUID) from tessl eval run output
RUN_ID=$(echo "$OUTPUT" | grep -oE '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' | head -1)
if [ -z "$RUN_ID" ]; then
echo "::warning::Could not extract run ID for $TILE_NAME"
FAIL=$((FAIL + 1))
echo "| $TILE_NAME | no run ID | ❌ |" >> "$SUMMARY_FILE"
echo " ❌ $TILE_NAME: could not extract run ID from output" >> "$ERRORS_FILE"
continue
fi

echo "Eval run started for $TILE_NAME with run ID: $RUN_ID"

# Poll for completion
ELAPSED=0
EVAL_STATUS="unknown"
while [ "$ELAPSED" -lt "$TIMEOUT" ]; do
sleep "$POLL_INTERVAL"
ELAPSED=$((ELAPSED + POLL_INTERVAL))

# Capture stdout and stderr separately to avoid breaking JSON parsing
VIEW_EXIT=0
tessl eval view "$RUN_ID" --json >"$STDOUT_TMP" 2>"$STDERR_TMP" || VIEW_EXIT=$?

if [ "$VIEW_EXIT" -ne 0 ]; then
STDERR_CONTENT=$(cat "$STDERR_TMP")
echo " [$TILE_NAME] tessl eval view exited $VIEW_EXIT: $STDERR_CONTENT"
if echo "$STDERR_CONTENT" | grep -qi "not found"; then
# Transient: run not yet visible, keep polling
EVAL_STATUS="unknown"
else
# Permanent failure
echo "::warning::tessl eval view failed for $TILE_NAME (exit $VIEW_EXIT): $STDERR_CONTENT"
EVAL_STATUS="failed"
break
fi
else
EVAL_STATUS=$(python3 -c "import sys,json; print(json.load(sys.stdin)['data']['attributes']['status'])" <"$STDOUT_TMP" 2>/dev/null) || {
echo " [$TILE_NAME] Warning: could not parse JSON from stdout"
EVAL_STATUS="unknown"
}
fi

echo " [$TILE_NAME] Poll at ${ELAPSED}s: status=$EVAL_STATUS"

if [ "$EVAL_STATUS" = "completed" ] || [ "$EVAL_STATUS" = "failed" ]; then
break
fi
done

if [ "$EVAL_STATUS" = "completed" ]; then
PASS=$((PASS + 1))
SUMMARY_ROWS="$SUMMARY_ROWS| $TILE_NAME | passed | ✅ |\n"
# Show detailed results only for successful eval
echo "| $TILE_NAME | passed | ✅ |" >> "$SUMMARY_FILE"
echo "::group::Eval results for $TILE_NAME"
tessl eval view "$RUN_ID" 2>&1 || true
echo "::endgroup::"
elif [ "$EVAL_STATUS" = "failed" ]; then
FAIL=$((FAIL + 1))
echo "| $TILE_NAME | failed | ❌ |" >> "$SUMMARY_FILE"
echo " ❌ $TILE_NAME: eval failed" >> "$ERRORS_FILE"
echo "::group::Eval results for $TILE_NAME"
tessl eval view --last 2>&1 || true
tessl eval view "$RUN_ID" 2>&1 || true
echo "::endgroup::"
else
FAIL=$((FAIL + 1))
echo "| $TILE_NAME | timeout | ❌ |" >> "$SUMMARY_FILE"
echo " ❌ $TILE_NAME: eval timed out after ${TIMEOUT}s (last status: $EVAL_STATUS)" >> "$ERRORS_FILE"
echo "::warning::Eval for $TILE_NAME timed out after ${TIMEOUT}s (last status: $EVAL_STATUS)"
fi
done

Expand All @@ -159,7 +230,7 @@ jobs:
echo ""
echo "| Tile | Result | Status |"
echo "|------|--------|--------|"
echo -e "$SUMMARY_ROWS"
cat "$SUMMARY_FILE"
echo "| **Total** | **$PASS/$TOTAL passed** | $([ "$FAIL" -eq 0 ] && echo '✅' || echo '❌') |"
} >> "$GITHUB_STEP_SUMMARY"

Expand All @@ -170,6 +241,11 @@ jobs:
echo " Total: $TOTAL"
echo " Passed: $PASS"
echo " Failed: $FAIL"
if [ -s "$ERRORS_FILE" ]; then
echo ""
echo " Failed evals:"
cat "$ERRORS_FILE"
fi
echo "============================="

if [ "$FAIL" -gt 0 ]; then
Expand Down