2626 STATUS_REVIEWING_CODE ,
2727 STATUS_NEW_CODE ,
2828 STATUS_WORKING ,
29+ STATUS_WRITING_CODE ,
2930)
3031from models import resolve_model
3132
6061 "additionalProperties" : False
6162}
6263
64+ _EDIT_TOOL = {
65+ "name" : "edit_job" ,
66+ "description" : (
67+ "Apply one or more edits to the user's CURRENT job code. Call this ONLY when "
68+ "the user wants their job changed — never to show an illustrative example "
69+ "(put examples in your normal text reply instead). Pass ALL edits in a single "
70+ "call via the `code_edits` array; they are applied in order, each operating on "
71+ "the result of the previous one. Write your conversational reply as normal text "
72+ "outside this tool call."
73+ ),
74+
75+ "strict" : True , # Structured outputs only used for code edits, not the entire model answer.
76+ "input_schema" : {
77+ "type" : "object" ,
78+ "properties" : {"code_edits" : _CODE_OUTPUT_SCHEMA ["properties" ]["code_edits" ]},
79+ "required" : ["code_edits" ],
80+ "additionalProperties" : False ,
81+ },
82+ }
83+
6384
6485# Helper function for page navigation
6586def extract_page_prefix_from_last_turn (history : List [Dict [str , str ]]) -> Optional [str ]:
@@ -202,14 +223,14 @@ def generate(
202223 refresh_rag = refresh_rag
203224 )
204225
205- # Structured outputs for suggest_code mode, effort for all modes
206- if suggest_code :
207- output_config = {
208- "format" : { "type " : "json_schema" , "schema" : _CODE_OUTPUT_SCHEMA },
209- "effort" : "medium"
210- }
211- else :
212- output_config = { "effort" : "medium" }
226+ # effort applies to all modes. For suggest_code we expose the `edit_job`
227+ # tool. tool_choice stays "auto": the model answers in text
228+ # and only calls the tool when it actually wants to change the job.
229+ output_config = { "effort " : "medium" }
230+ tool_kwargs = (
231+ { "tools" : [ _EDIT_TOOL ], "tool_choice" : { "type" : "auto" } }
232+ if suggest_code else {}
233+ )
213234
214235 with sentry_sdk .start_span (description = "anthropic_api_call" ):
215236 if stream :
@@ -229,13 +250,18 @@ def generate(
229250 model = self .config .model ,
230251 system = system_message ,
231252 thinking = {"type" : "adaptive" },
232- output_config = output_config
253+ output_config = output_config ,
254+ ** tool_kwargs
233255 )
234256
235257 with self .client .messages .stream (** stream_kwargs ) as stream_obj :
236258 for event in stream_obj :
237259 if event .type == "message_start" :
238260 stream_manager .send_thinking (STATUS_WORKING )
261+ # The edit_job tool block starts after the text ends; its
262+ # input (the code) streams silently, so show a status here.
263+ elif event .type == "content_block_start" and getattr (getattr (event , "content_block" , None ), "type" , None ) == "tool_use" :
264+ stream_manager .send_thinking (STATUS_WRITING_CODE )
239265 accumulated_response , text_started , sent_length = self .process_stream_event (
240266 event ,
241267 accumulated_response ,
@@ -261,7 +287,8 @@ def generate(
261287 create_kwargs = dict (
262288 max_tokens = self .config .max_tokens , messages = prompt , model = self .config .model , system = system_message ,
263289 thinking = {"type" : "adaptive" },
264- output_config = output_config
290+ output_config = output_config ,
291+ ** tool_kwargs
265292 )
266293 message = self .client .messages .create (** create_kwargs )
267294
@@ -271,32 +298,45 @@ def generate(
271298 if message .usage .cache_read_input_tokens :
272299 logger .info (f"Cache read: { message .usage .cache_read_input_tokens } tokens" )
273300
274- response_parts = []
301+ # The model answers in normal text; it calls the `edit_job` tool only
302+ # when it wants to change the user's job. So text = the reply, and the
303+ # tool's parsed input carries the code edits (no JSON-in-text parsing).
304+ text_parts = []
305+ tool_code_edits = None
275306 for content_block in message .content :
276- if content_block .type == "text" :
277- response_parts .append (content_block .text )
307+ if getattr (content_block , "type" , None ) == "tool_use" and getattr (content_block , "name" , None ) == "edit_job" :
308+ tool_code_edits = (content_block .input or {}).get ("code_edits" ) or []
309+ elif getattr (content_block , "type" , None ) == "text" :
310+ text_parts .append (content_block .text )
278311
279- response = "\n \n " .join (response_parts )
312+ text_response = "\n \n " .join (text_parts ).strip ()
313+ suggested_code = None
314+ diff = None
280315
281- if suggest_code is True :
316+ if suggest_code is True and tool_code_edits :
282317 job_code = context .get ("expression" ) if isinstance (context , dict ) else None
318+ if job_code :
319+ with sentry_sdk .start_span (description = "apply_code_edits" ):
320+ suggested_code , diff = self .apply_code_edits (
321+ content = content , text_answer = text_response ,
322+ original_code = job_code , code_edits = tool_code_edits ,
323+ )
324+ # If the model called the tool but emitted no prose, give the user
325+ # a short confirmation so the response isn't empty.
326+ if not text_response and suggested_code :
327+ text_response = "I'll update your job code."
283328
284- if getattr (self , "_stream_applied" , False ):
285- # Streaming already applied edits — reuse instead of redoing the work
286- try :
287- text_response = json .loads (response ).get ("text_answer" , "" ).strip ()
288- except (json .JSONDecodeError , ValueError ):
289- text_response = response
290- suggested_code = self ._stream_suggested_code
291- diff = self ._stream_diff
329+ # Visibility: did the model call edit_job, and in what block order?
330+ # (block order shows whether text came before/after the tool call.)
331+ if suggest_code is True :
332+ _blocks = [getattr (b , "type" , "?" ) for b in message .content ]
333+ if tool_code_edits is None :
334+ logger .info ("edit_job NOT called — text-only answer (blocks=%r)" , _blocks )
292335 else :
293- with sentry_sdk .start_span (description = "parse_and_apply_edits" ):
294- text_response , suggested_code , diff = self .parse_and_apply_edits (response = response , content = content , original_code = job_code )
295-
296- else :
297- text_response = response
298- suggested_code = None
299- diff = None
336+ logger .info (
337+ "edit_job CALLED: %d edit(s), patches_applied=%s (blocks=%r)" ,
338+ len (tool_code_edits ), (diff or {}).get ("patches_applied" ), _blocks ,
339+ )
300340
301341 # Add prefix to content when building history
302342 prefixed_content = add_page_prefix (content , current_page )
@@ -311,18 +351,31 @@ def generate(
311351 * [usage_data for usage_key , usage_data in retrieved_knowledge .get ("usage" , {}).items ()]
312352 )
313353
354+ stop_reason = getattr (message , "stop_reason" , None )
355+
356+ # Check truncation BEFORE the empty check. max_tokens commonly leaves
357+ # PARTIAL text behind (or partial/broken JSON in suggest_code mode);
358+ # if we only inspected stop_reason when text_response is empty, that
359+ # cut-off content would be returned as a normal success and the
360+ # truncation signal lost. Surface it regardless of whether text came back.
361+ if stop_reason == "max_tokens" :
362+ sentry_sdk .set_tag ("stop_reason" , stop_reason )
363+ sentry_sdk .set_tag ("empty_reason" , "max_tokens" )
364+ sentry_sdk .set_context ("empty_response" , {
365+ "service" : "job_chat" ,
366+ "suggest_code" : bool (suggest_code ),
367+ })
368+ stream_manager .end_stream ()
369+ raise ApolloError (502 , "Response truncated" , type = "OUTPUT_TRUNCATED" )
370+
314371 if not text_response :
315- stop_reason = getattr (message , "stop_reason" , None )
316- empty_reason = "max_tokens" if stop_reason == "max_tokens" else "no_text_blocks"
317372 sentry_sdk .set_tag ("stop_reason" , stop_reason )
318- sentry_sdk .set_tag ("empty_reason" , empty_reason )
373+ sentry_sdk .set_tag ("empty_reason" , "no_text_blocks" )
319374 sentry_sdk .set_context ("empty_response" , {
320375 "service" : "job_chat" ,
321376 "suggest_code" : bool (suggest_code ),
322377 })
323378 stream_manager .end_stream ()
324- if stop_reason == "max_tokens" :
325- raise ApolloError (502 , "Response truncated" , type = "OUTPUT_TRUNCATED" )
326379 raise ApolloError (502 , "Model returned no usable text" , type = "EMPTY_OUTPUT" )
327380
328381 stream_manager .end_stream ()
@@ -350,65 +403,14 @@ def process_stream_event(
350403 """
351404 Process a single stream event from the Anthropic API.
352405
353- With suggest_code, code_edits are generated first (buffered silently),
354- then a changes event is sent, and text_answer streams to the client.
406+ The conversational reply is plain text now. Code edits arrive via the
407+ `edit_job` tool call (as input_json_delta) and are applied from the final
408+ message — not streamed here — so we simply forward text deltas live.
355409 """
356- if event .type == "content_block_delta" :
357- if event .delta .type == "text_delta" :
358- text_chunk = event .delta .text
359- accumulated_response += text_chunk
360-
361- if suggest_code and not text_started :
362- # Code edits phase: buffer silently until text_answer starts.
363- # Tolerant of whitespace variants the model may emit.
364- match = re .search (r'"text_answer"\s*:\s*"' , accumulated_response )
365-
366- if match :
367- # Extract code_edits from the JSON before the delimiter
368- edits_part = accumulated_response [:match .start ()]
369- # Find the code_edits array value
370- try :
371- # Close the partial object and extract code_edits
372- partial = edits_part .rstrip ().rstrip ("," ) + "}"
373- code_edits = json .loads (partial ).get ("code_edits" , [])
374-
375- if original_code and code_edits :
376- suggested_code , diff = self .apply_code_edits (
377- content = content or "" ,
378- text_answer = "" ,
379- original_code = original_code ,
380- code_edits = code_edits
381- )
382- self ._stream_applied = True
383- self ._stream_suggested_code = suggested_code
384- self ._stream_diff = diff
385- if suggested_code :
386- stream_manager .send_changes ({"code" : suggested_code })
387- else :
388- stream_manager .send_changes ({"code_edits" : code_edits })
389- elif code_edits :
390- stream_manager .send_changes ({"code_edits" : code_edits })
391- except (json .JSONDecodeError , ValueError ):
392- logger .warning (f"Failed to parse code_edits during streaming" )
393-
394- # Mark where text content starts in the accumulated buffer
395- sent_length = match .end ()
396- text_started = True
397-
398- if suggest_code and text_started :
399- # Text phase: stream with buffer to handle split escape sequences
400- # Use 2-char buffer since longest escape is 2 chars (e.g. \n, \")
401- buffer_size = 2
402- safe_to_send_until = len (accumulated_response ) - buffer_size
403-
404- if safe_to_send_until > sent_length :
405- safe_text = accumulated_response [sent_length :safe_to_send_until ]
406- stream_manager .send_text (self ._unescape_json_string (safe_text ))
407- sent_length = safe_to_send_until
408-
409- elif not suggest_code :
410- # Normal streaming for non-code suggestions
411- stream_manager .send_text (text_chunk )
410+ if event .type == "content_block_delta" and event .delta .type == "text_delta" :
411+ text_chunk = event .delta .text
412+ accumulated_response += text_chunk
413+ stream_manager .send_text (text_chunk )
412414
413415 return accumulated_response , text_started , sent_length
414416
@@ -532,25 +534,15 @@ def try_error_correction(self, content: str, error_message: str, old_code: str,
532534 full_code = full_code ,
533535 text_explanation = text_explanation
534536 )
535- correction_schema = {
536- "type" : "object" ,
537- "properties" : {
538- "explanation" : {"type" : "string" },
539- "corrected_old_code" : {"type" : "string" },
540- "corrected_new_code" : {"type" : "string" }
541- },
542- "required" : ["explanation" , "corrected_old_code" , "corrected_new_code" ],
543- "additionalProperties" : False
544- }
537+ # structured outputs removed here too (see note in generate); the
538+ # correction prompt already instructs the {explanation, corrected_*}
539+ # JSON shape and json.loads below is wrapped in try/except.
545540 message = self .client .messages .create (
546541 max_tokens = 16384 ,
547542 messages = prompt ,
548543 model = self .config .model ,
549544 system = system_message ,
550- output_config = {
551- "format" : {"type" : "json_schema" , "schema" : correction_schema },
552- "effort" : "medium"
553- },
545+ output_config = {"effort" : "medium" },
554546 thinking = {"type" : "adaptive" }
555547 )
556548
0 commit comments