@@ -9,6 +9,25 @@ if [ "$DEBUG" = "true" ]; then
99fi
1010
1111ADC_PATH=" ${GOOGLE_APPLICATION_CREDENTIALS:- ${HOME} / .config/ gcloud/ application_default_credentials.json} "
12+ CLAUDIO_RESULT_FILE=" ${CLAUDIO_RESULT_FILE:- } "
13+ CLAUDIO_EVALUATION_PROMPT=" ${CLAUDIO_EVALUATION_PROMPT:- $(cat <<' EOF'
14+ Read this Claude Code session log.
15+
16+ Determine whether the original task was FULLY completed successfully.
17+
18+ Return ONLY one of:
19+ - SUCCESS
20+ - FAILURE: <short reason>
21+
22+ Mark the task as FAILURE if:
23+ - the agent abandoned the task
24+ - commands or tool calls failed without recovery
25+ - tests failed
26+ - the requested work was only partially completed
27+ - the final state is uncertain
28+ - the task could not be verified as complete
29+ EOF
30+ )} "
1231
1332# ##################
1433# ### Functions ####
@@ -25,6 +44,46 @@ check_adc() {
2544 return 1
2645}
2746
47+ validate_result () {
48+ echo " === Validating Claudio result ==="
49+
50+ if [ ! -f " ${CLAUDIO_RESULT_FILE} " ]; then
51+ echo " ERROR: Claudio did not produce a result file"
52+ echo " ERROR: Task status is unknown"
53+ return 1
54+ fi
55+
56+ local result
57+ result=" $( head -n1 " ${CLAUDIO_RESULT_FILE} " | tr -d ' \r' ) "
58+
59+ echo " Result: ${result} "
60+
61+ case " ${result} " in
62+ SUCCESS)
63+ echo " === Claudio task completed successfully ==="
64+ return 0
65+ ;;
66+
67+ FAILURE:* )
68+ echo " === Claudio task reported failure ==="
69+ echo " ${result} "
70+ return 1
71+ ;;
72+
73+ * )
74+ echo " ERROR: Invalid result format"
75+ echo " Expected:"
76+ echo " SUCCESS"
77+ echo " or:"
78+ echo " FAILURE: <reason>"
79+ echo
80+ echo " Received:"
81+ echo " ${result} "
82+ return 1
83+ ;;
84+ esac
85+ }
86+
2887# #############
2988# ### Main ####
3089# #############
@@ -68,10 +127,19 @@ done
68127
69128# --- Non-streaming mode: transparent passthrough ---
70129if [ " ${CLAUDIO_STREAM:- } " != " 1" ]; then
130+ if [ -n " ${CLAUDIO_RESULT_FILE} " ]; then
131+ echo " ERROR: CLAUDIO_RESULT_FILE requires streaming mode (CLAUDIO_STREAM=1) to evaluate results"
132+ exit 1
133+ fi
71134 exec claude " $@ "
72135fi
73136
74137# --- CI streaming mode ---
138+ if [ -n " ${CLAUDIO_RESULT_FILE} " ] && [ -z " ${CLAUDIO_LOG_FILE:- } " ]; then
139+ CLAUDIO_LOG_FILE=" $( mktemp /tmp/claudio-session.XXXXXX.log) "
140+ echo " CLAUDIO_LOG_FILE not set; defaulting to ${CLAUDIO_LOG_FILE} for result evaluation"
141+ fi
142+
75143stream_args=()
76144[ -n " ${CLAUDIO_LOG_FILE:- } " ] && stream_args+=(--log-file " $CLAUDIO_LOG_FILE " )
77145[ -n " ${CLAUDIO_WRAP:- } " ] && stream_args+=(--wrap " $CLAUDIO_WRAP " )
@@ -110,5 +178,25 @@ wait "$claude_pid" 2>/dev/null && claude_rc=0 || claude_rc=$?
110178
111179# 143 = SIGTERM (expected when we kill claude after stream ends)
112180if [ " $stream_rc " -ne 0 ]; then exit " $stream_rc " ; fi
113- if [ " $claude_rc " -eq 0 ] || [ " $claude_rc " -eq 143 ]; then exit 0; fi
114- exit " $claude_rc "
181+ if [ " $claude_rc " -ne 0 ] && [ " $claude_rc " -ne 143 ]; then exit " $claude_rc " ; fi
182+
183+ # Result check: use a second Claude call to evaluate whether the task
184+ # actually completed successfully based on the session log.
185+ if [ -n " ${CLAUDIO_RESULT_FILE} " ] && [ -s " ${CLAUDIO_LOG_FILE:- } " ]; then
186+ echo " === Evaluating task result ==="
187+
188+ if ! tail -c " ${CLAUDIO_RESULT_MAX_CHARS:- 50000} " " ${CLAUDIO_LOG_FILE} " | \
189+ claude -p " ${CLAUDIO_EVALUATION_PROMPT} " \
190+ --model " ${CLAUDIO_EVALUATION_MODEL:- claude-haiku-4-5-20251001} " \
191+ --no-session-persistence \
192+ > " ${CLAUDIO_RESULT_FILE} "
193+ then
194+ echo " ERROR: Failed to evaluate task result"
195+ exit 1
196+ fi
197+
198+ validate_result
199+ exit $?
200+ fi
201+
202+ exit 0
0 commit comments