Skip to content

Commit e7bfdf8

Browse files
authored
smoke test allow pass for flaky providers (#6638)
1 parent 383ae71 commit e7bfdf8

1 file changed

Lines changed: 43 additions & 5 deletions

File tree

scripts/test_providers.sh

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ for arg in "$@"; do
1313
esac
1414
done
1515

16+
# Flaky models that are allowed to fail without failing the entire test run.
17+
# These are typically preview/experimental models with inconsistent tool-calling behavior.
18+
# Failures are still reported but don't block PRs.
19+
ALLOWED_FAILURES=(
20+
"google:gemini-3-pro-preview"
21+
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
22+
)
23+
1624
if [ -f .env ]; then
1725
export $(grep -v '^#' .env | xargs)
1826
fi
@@ -71,7 +79,20 @@ else
7179
fi
7280
echo ""
7381

82+
is_allowed_failure() {
83+
local provider="$1"
84+
local model="$2"
85+
local key="${provider}:${model}"
86+
for allowed in "${ALLOWED_FAILURES[@]}"; do
87+
if [ "$allowed" = "$key" ]; then
88+
return 0
89+
fi
90+
done
91+
return 1
92+
}
93+
7494
RESULTS=()
95+
HARD_FAILURES=()
7596

7697
for provider_config in "${PROVIDERS[@]}"; do
7798
# Split on " -> " to get provider and models
@@ -94,8 +115,14 @@ for provider_config in "${PROVIDERS[@]}"; do
94115
echo "✓ SUCCESS: Test passed - $SUCCESS_MSG"
95116
RESULTS+=("${PROVIDER}: ${MODEL}")
96117
else
97-
echo "✗ FAILED: Test failed - $FAILURE_MSG"
98-
RESULTS+=("${PROVIDER}: ${MODEL}")
118+
if is_allowed_failure "$PROVIDER" "$MODEL"; then
119+
echo "⚠ FLAKY: Test failed but model is in allowed failures list - $FAILURE_MSG"
120+
RESULTS+=("${PROVIDER}: ${MODEL} (flaky)")
121+
else
122+
echo "✗ FAILED: Test failed - $FAILURE_MSG"
123+
RESULTS+=("${PROVIDER}: ${MODEL}")
124+
HARD_FAILURES+=("${PROVIDER}: ${MODEL}")
125+
fi
99126
fi
100127
rm "$TMPFILE"
101128
rm -rf "$TESTDIR"
@@ -107,11 +134,22 @@ echo "=== Test Summary ==="
107134
for result in "${RESULTS[@]}"; do
108135
echo "$result"
109136
done
110-
if echo "${RESULTS[@]}" | grep -q ""; then
137+
138+
if [ ${#HARD_FAILURES[@]} -gt 0 ]; then
139+
echo ""
140+
echo "Hard failures (${#HARD_FAILURES[@]}):"
141+
for failure in "${HARD_FAILURES[@]}"; do
142+
echo " - $failure"
143+
done
111144
echo ""
112145
echo "Some tests failed!"
113146
exit 1
114147
else
115-
echo ""
116-
echo "All tests passed!"
148+
if echo "${RESULTS[@]}" | grep -q ""; then
149+
echo ""
150+
echo "All required tests passed! (some flaky tests failed but are allowed)"
151+
else
152+
echo ""
153+
echo "All tests passed!"
154+
fi
117155
fi

0 commit comments

Comments
 (0)