@@ -13,6 +13,14 @@ for arg in "$@"; do
1313 esac
1414done
1515
16+ # Flaky models that are allowed to fail without failing the entire test run.
17+ # These are typically preview/experimental models with inconsistent tool-calling behavior.
18+ # Failures are still reported but don't block PRs.
19+ ALLOWED_FAILURES=(
20+ " google:gemini-3-pro-preview"
21+ " openrouter:nvidia/nemotron-3-nano-30b-a3b"
22+ )
23+
1624if [ -f .env ]; then
1725 export $( grep -v ' ^#' .env | xargs)
1826fi
7179fi
7280echo " "
7381
82+ is_allowed_failure () {
83+ local provider=" $1 "
84+ local model=" $2 "
85+ local key=" ${provider} :${model} "
86+ for allowed in " ${ALLOWED_FAILURES[@]} " ; do
87+ if [ " $allowed " = " $key " ]; then
88+ return 0
89+ fi
90+ done
91+ return 1
92+ }
93+
7494RESULTS=()
95+ HARD_FAILURES=()
7596
7697for provider_config in " ${PROVIDERS[@]} " ; do
7798 # Split on " -> " to get provider and models
@@ -94,8 +115,14 @@ for provider_config in "${PROVIDERS[@]}"; do
94115 echo " ✓ SUCCESS: Test passed - $SUCCESS_MSG "
95116 RESULTS+=(" ✓ ${PROVIDER} : ${MODEL} " )
96117 else
97- echo " ✗ FAILED: Test failed - $FAILURE_MSG "
98- RESULTS+=(" ✗ ${PROVIDER} : ${MODEL} " )
118+ if is_allowed_failure " $PROVIDER " " $MODEL " ; then
119+ echo " ⚠ FLAKY: Test failed but model is in allowed failures list - $FAILURE_MSG "
120+ RESULTS+=(" ⚠ ${PROVIDER} : ${MODEL} (flaky)" )
121+ else
122+ echo " ✗ FAILED: Test failed - $FAILURE_MSG "
123+ RESULTS+=(" ✗ ${PROVIDER} : ${MODEL} " )
124+ HARD_FAILURES+=(" ${PROVIDER} : ${MODEL} " )
125+ fi
99126 fi
100127 rm " $TMPFILE "
101128 rm -rf " $TESTDIR "
@@ -107,11 +134,22 @@ echo "=== Test Summary ==="
107134for result in " ${RESULTS[@]} " ; do
108135 echo " $result "
109136done
110- if echo " ${RESULTS[@]} " | grep -q " ✗" ; then
137+
138+ if [ ${# HARD_FAILURES[@]} -gt 0 ]; then
139+ echo " "
140+ echo " Hard failures (${# HARD_FAILURES[@]} ):"
141+ for failure in " ${HARD_FAILURES[@]} " ; do
142+ echo " - $failure "
143+ done
111144 echo " "
112145 echo " Some tests failed!"
113146 exit 1
114147else
115- echo " "
116- echo " All tests passed!"
148+ if echo " ${RESULTS[@]} " | grep -q " ⚠" ; then
149+ echo " "
150+ echo " All required tests passed! (some flaky tests failed but are allowed)"
151+ else
152+ echo " "
153+ echo " All tests passed!"
154+ fi
117155fi
0 commit comments