Skip to content

Commit adcb262

Browse files
josephjclarkdependabot[bot]hanna-paasivirta
authored
Release 1.2.1 (#511)
* Bump idna from 3.13 to 3.15 (#499) Bumps [idna](https://github.com/kjd/idna) from 3.13 to 3.15. - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.md) - [Commits](kjd/idna@v3.13...v3.15) --- updated-dependencies: - dependency-name: idna dependency-version: '3.15' dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * dependency bumps (#510) * Job_chat: Implement tool-use for code edits (#501) * remove strucutred outputs and try opus * adjust prompt for json format clarification * add answer tool * code edits tool * strict tool use * tidy * temp mark * typo * tense * tweak streaming * add changeset * version --------- Co-authored-by: Joe Clark <jclark@openfn.org> * version: 1.2.1 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Hanna Paasivirta <hanna@openfn.org>
1 parent 5f1c144 commit adcb262

9 files changed

Lines changed: 650 additions & 196 deletions

File tree

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# apollo
22

3+
## 1.2.1
4+
5+
### Patch Changes
6+
7+
- Security updates
8+
9+
## 1.2.0
10+
11+
### Minor Changes
12+
13+
- c3ebe8e: Update Job_chat to use a code edit tool. This fixes issues of empty
14+
responses coming back from the model. It also changes the order in which text
15+
and code are returned in streaming mode.
16+
317
## 1.1.3
418

519
### Patch Changes

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "apollo",
33
"module": "platform/index.ts",
4-
"version": "1.1.3",
4+
"version": "1.2.1",
55
"type": "module",
66
"scripts": {
77
"start": "NODE_ENV=production bun platform/src/index.ts",

poetry.lock

Lines changed: 11 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

services/job_chat/job_chat.py

Lines changed: 99 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
STATUS_REVIEWING_CODE,
2727
STATUS_NEW_CODE,
2828
STATUS_WORKING,
29+
STATUS_WRITING_CODE,
2930
)
3031
from models import resolve_model
3132

@@ -60,6 +61,26 @@
6061
"additionalProperties": False
6162
}
6263

64+
_EDIT_TOOL = {
65+
"name": "edit_job",
66+
"description": (
67+
"Apply one or more edits to the user's CURRENT job code. Call this ONLY when "
68+
"the user wants their job changed — never to show an illustrative example "
69+
"(put examples in your normal text reply instead). Pass ALL edits in a single "
70+
"call via the `code_edits` array; they are applied in order, each operating on "
71+
"the result of the previous one. Write your conversational reply as normal text "
72+
"outside this tool call."
73+
),
74+
75+
"strict": True, # Structured outputs only used for code edits, not the entire model answer.
76+
"input_schema": {
77+
"type": "object",
78+
"properties": {"code_edits": _CODE_OUTPUT_SCHEMA["properties"]["code_edits"]},
79+
"required": ["code_edits"],
80+
"additionalProperties": False,
81+
},
82+
}
83+
6384

6485
# Helper function for page navigation
6586
def extract_page_prefix_from_last_turn(history: List[Dict[str, str]]) -> Optional[str]:
@@ -202,14 +223,14 @@ def generate(
202223
refresh_rag=refresh_rag
203224
)
204225

205-
# Structured outputs for suggest_code mode, effort for all modes
206-
if suggest_code:
207-
output_config = {
208-
"format": {"type": "json_schema", "schema": _CODE_OUTPUT_SCHEMA},
209-
"effort": "medium"
210-
}
211-
else:
212-
output_config = {"effort": "medium"}
226+
# effort applies to all modes. For suggest_code we expose the `edit_job`
227+
# tool. tool_choice stays "auto": the model answers in text
228+
# and only calls the tool when it actually wants to change the job.
229+
output_config = {"effort": "medium"}
230+
tool_kwargs = (
231+
{"tools": [_EDIT_TOOL], "tool_choice": {"type": "auto"}}
232+
if suggest_code else {}
233+
)
213234

214235
with sentry_sdk.start_span(description="anthropic_api_call"):
215236
if stream:
@@ -229,13 +250,18 @@ def generate(
229250
model=self.config.model,
230251
system=system_message,
231252
thinking={"type": "adaptive"},
232-
output_config=output_config
253+
output_config=output_config,
254+
**tool_kwargs
233255
)
234256

235257
with self.client.messages.stream(**stream_kwargs) as stream_obj:
236258
for event in stream_obj:
237259
if event.type == "message_start":
238260
stream_manager.send_thinking(STATUS_WORKING)
261+
# The edit_job tool block starts after the text ends; its
262+
# input (the code) streams silently, so show a status here.
263+
elif event.type == "content_block_start" and getattr(getattr(event, "content_block", None), "type", None) == "tool_use":
264+
stream_manager.send_thinking(STATUS_WRITING_CODE)
239265
accumulated_response, text_started, sent_length = self.process_stream_event(
240266
event,
241267
accumulated_response,
@@ -261,7 +287,8 @@ def generate(
261287
create_kwargs = dict(
262288
max_tokens=self.config.max_tokens, messages=prompt, model=self.config.model, system=system_message,
263289
thinking={"type": "adaptive"},
264-
output_config=output_config
290+
output_config=output_config,
291+
**tool_kwargs
265292
)
266293
message = self.client.messages.create(**create_kwargs)
267294

@@ -271,32 +298,45 @@ def generate(
271298
if message.usage.cache_read_input_tokens:
272299
logger.info(f"Cache read: {message.usage.cache_read_input_tokens} tokens")
273300

274-
response_parts = []
301+
# The model answers in normal text; it calls the `edit_job` tool only
302+
# when it wants to change the user's job. So text = the reply, and the
303+
# tool's parsed input carries the code edits (no JSON-in-text parsing).
304+
text_parts = []
305+
tool_code_edits = None
275306
for content_block in message.content:
276-
if content_block.type == "text":
277-
response_parts.append(content_block.text)
307+
if getattr(content_block, "type", None) == "tool_use" and getattr(content_block, "name", None) == "edit_job":
308+
tool_code_edits = (content_block.input or {}).get("code_edits") or []
309+
elif getattr(content_block, "type", None) == "text":
310+
text_parts.append(content_block.text)
278311

279-
response = "\n\n".join(response_parts)
312+
text_response = "\n\n".join(text_parts).strip()
313+
suggested_code = None
314+
diff = None
280315

281-
if suggest_code is True:
316+
if suggest_code is True and tool_code_edits:
282317
job_code = context.get("expression") if isinstance(context, dict) else None
318+
if job_code:
319+
with sentry_sdk.start_span(description="apply_code_edits"):
320+
suggested_code, diff = self.apply_code_edits(
321+
content=content, text_answer=text_response,
322+
original_code=job_code, code_edits=tool_code_edits,
323+
)
324+
# If the model called the tool but emitted no prose, give the user
325+
# a short confirmation so the response isn't empty.
326+
if not text_response and suggested_code:
327+
text_response = "I'll update your job code."
283328

284-
if getattr(self, "_stream_applied", False):
285-
# Streaming already applied edits — reuse instead of redoing the work
286-
try:
287-
text_response = json.loads(response).get("text_answer", "").strip()
288-
except (json.JSONDecodeError, ValueError):
289-
text_response = response
290-
suggested_code = self._stream_suggested_code
291-
diff = self._stream_diff
329+
# Visibility: did the model call edit_job, and in what block order?
330+
# (block order shows whether text came before/after the tool call.)
331+
if suggest_code is True:
332+
_blocks = [getattr(b, "type", "?") for b in message.content]
333+
if tool_code_edits is None:
334+
logger.info("edit_job NOT called — text-only answer (blocks=%r)", _blocks)
292335
else:
293-
with sentry_sdk.start_span(description="parse_and_apply_edits"):
294-
text_response, suggested_code, diff = self.parse_and_apply_edits(response=response, content=content, original_code=job_code)
295-
296-
else:
297-
text_response = response
298-
suggested_code = None
299-
diff = None
336+
logger.info(
337+
"edit_job CALLED: %d edit(s), patches_applied=%s (blocks=%r)",
338+
len(tool_code_edits), (diff or {}).get("patches_applied"), _blocks,
339+
)
300340

301341
# Add prefix to content when building history
302342
prefixed_content = add_page_prefix(content, current_page)
@@ -311,18 +351,31 @@ def generate(
311351
*[usage_data for usage_key, usage_data in retrieved_knowledge.get("usage", {}).items()]
312352
)
313353

354+
stop_reason = getattr(message, "stop_reason", None)
355+
356+
# Check truncation BEFORE the empty check. max_tokens commonly leaves
357+
# PARTIAL text behind (or partial/broken JSON in suggest_code mode);
358+
# if we only inspected stop_reason when text_response is empty, that
359+
# cut-off content would be returned as a normal success and the
360+
# truncation signal lost. Surface it regardless of whether text came back.
361+
if stop_reason == "max_tokens":
362+
sentry_sdk.set_tag("stop_reason", stop_reason)
363+
sentry_sdk.set_tag("empty_reason", "max_tokens")
364+
sentry_sdk.set_context("empty_response", {
365+
"service": "job_chat",
366+
"suggest_code": bool(suggest_code),
367+
})
368+
stream_manager.end_stream()
369+
raise ApolloError(502, "Response truncated", type="OUTPUT_TRUNCATED")
370+
314371
if not text_response:
315-
stop_reason = getattr(message, "stop_reason", None)
316-
empty_reason = "max_tokens" if stop_reason == "max_tokens" else "no_text_blocks"
317372
sentry_sdk.set_tag("stop_reason", stop_reason)
318-
sentry_sdk.set_tag("empty_reason", empty_reason)
373+
sentry_sdk.set_tag("empty_reason", "no_text_blocks")
319374
sentry_sdk.set_context("empty_response", {
320375
"service": "job_chat",
321376
"suggest_code": bool(suggest_code),
322377
})
323378
stream_manager.end_stream()
324-
if stop_reason == "max_tokens":
325-
raise ApolloError(502, "Response truncated", type="OUTPUT_TRUNCATED")
326379
raise ApolloError(502, "Model returned no usable text", type="EMPTY_OUTPUT")
327380

328381
stream_manager.end_stream()
@@ -350,65 +403,14 @@ def process_stream_event(
350403
"""
351404
Process a single stream event from the Anthropic API.
352405
353-
With suggest_code, code_edits are generated first (buffered silently),
354-
then a changes event is sent, and text_answer streams to the client.
406+
The conversational reply is plain text now. Code edits arrive via the
407+
`edit_job` tool call (as input_json_delta) and are applied from the final
408+
message — not streamed here — so we simply forward text deltas live.
355409
"""
356-
if event.type == "content_block_delta":
357-
if event.delta.type == "text_delta":
358-
text_chunk = event.delta.text
359-
accumulated_response += text_chunk
360-
361-
if suggest_code and not text_started:
362-
# Code edits phase: buffer silently until text_answer starts.
363-
# Tolerant of whitespace variants the model may emit.
364-
match = re.search(r'"text_answer"\s*:\s*"', accumulated_response)
365-
366-
if match:
367-
# Extract code_edits from the JSON before the delimiter
368-
edits_part = accumulated_response[:match.start()]
369-
# Find the code_edits array value
370-
try:
371-
# Close the partial object and extract code_edits
372-
partial = edits_part.rstrip().rstrip(",") + "}"
373-
code_edits = json.loads(partial).get("code_edits", [])
374-
375-
if original_code and code_edits:
376-
suggested_code, diff = self.apply_code_edits(
377-
content=content or "",
378-
text_answer="",
379-
original_code=original_code,
380-
code_edits=code_edits
381-
)
382-
self._stream_applied = True
383-
self._stream_suggested_code = suggested_code
384-
self._stream_diff = diff
385-
if suggested_code:
386-
stream_manager.send_changes({"code": suggested_code})
387-
else:
388-
stream_manager.send_changes({"code_edits": code_edits})
389-
elif code_edits:
390-
stream_manager.send_changes({"code_edits": code_edits})
391-
except (json.JSONDecodeError, ValueError):
392-
logger.warning(f"Failed to parse code_edits during streaming")
393-
394-
# Mark where text content starts in the accumulated buffer
395-
sent_length = match.end()
396-
text_started = True
397-
398-
if suggest_code and text_started:
399-
# Text phase: stream with buffer to handle split escape sequences
400-
# Use 2-char buffer since longest escape is 2 chars (e.g. \n, \")
401-
buffer_size = 2
402-
safe_to_send_until = len(accumulated_response) - buffer_size
403-
404-
if safe_to_send_until > sent_length:
405-
safe_text = accumulated_response[sent_length:safe_to_send_until]
406-
stream_manager.send_text(self._unescape_json_string(safe_text))
407-
sent_length = safe_to_send_until
408-
409-
elif not suggest_code:
410-
# Normal streaming for non-code suggestions
411-
stream_manager.send_text(text_chunk)
410+
if event.type == "content_block_delta" and event.delta.type == "text_delta":
411+
text_chunk = event.delta.text
412+
accumulated_response += text_chunk
413+
stream_manager.send_text(text_chunk)
412414

413415
return accumulated_response, text_started, sent_length
414416

@@ -532,25 +534,15 @@ def try_error_correction(self, content: str, error_message: str, old_code: str,
532534
full_code=full_code,
533535
text_explanation=text_explanation
534536
)
535-
correction_schema = {
536-
"type": "object",
537-
"properties": {
538-
"explanation": {"type": "string"},
539-
"corrected_old_code": {"type": "string"},
540-
"corrected_new_code": {"type": "string"}
541-
},
542-
"required": ["explanation", "corrected_old_code", "corrected_new_code"],
543-
"additionalProperties": False
544-
}
537+
# structured outputs removed here too (see note in generate); the
538+
# correction prompt already instructs the {explanation, corrected_*}
539+
# JSON shape and json.loads below is wrapped in try/except.
545540
message = self.client.messages.create(
546541
max_tokens=16384,
547542
messages=prompt,
548543
model=self.config.model,
549544
system=system_message,
550-
output_config={
551-
"format": {"type": "json_schema", "schema": correction_schema},
552-
"effort": "medium"
553-
},
545+
output_config={"effort": "medium"},
554546
thinking={"type": "adaptive"}
555547
)
556548

0 commit comments

Comments
 (0)