Skip to content

Commit 76a4828

Browse files
author
Forge
committed
fix: parse OpenClaw native toolCall/toolResult format
OpenClaw JSONL uses camelCase 'toolCall' content blocks (not Anthropic's 'tool_use'), with fields 'name'+'arguments' (not 'toolName'+'input'). Tool results are separate messages with role='toolResult' (not role='tool'), carrying top-level 'toolCallId'+'toolName' and result in content[0].text. Previous behavior: every tool interaction parsed as 'unknown operation / Ran unknown' because field names never matched. Fix: - toolCall: read block.name and block.arguments (fall back to legacy fields) - toolResult: match role='toolResult', extract content[0].text, pair by toolCallId - Keep legacy OpenAI tool_calls array support for compatibility Tested on 202 real OpenClaw sessions: - Before fix: 0 interactions extracted per session - After fix: 3,676,255 tokens compressed from 3,790,702 (97.0% reduction) Top session: 649K tokens → 9K (98.6%)
1 parent 76fda6e commit 76a4828

1 file changed

Lines changed: 86 additions & 30 deletions

File tree

scripts/observation_compressor.py

Lines changed: 86 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -115,56 +115,112 @@ def parse_session_jsonl(path: Path) -> List[Dict[str, Any]]:
115115
def extract_tool_interactions(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
116116
"""Extract tool call/result pairs from parsed messages.
117117
118+
Supports OpenClaw's native JSONL format:
119+
- Tool calls: role="assistant", content block type="toolCall",
120+
fields: name (str), arguments (dict), id (str)
121+
- Tool results: role="toolResult" (top-level), fields: toolName, toolCallId,
122+
content=[{type:"text", text:"..."}]
123+
124+
Also handles legacy OpenAI-style tool_calls arrays for compatibility.
125+
118126
Returns list of interaction dicts with tool_name, input_summary, output_summary.
119127
"""
120128
interactions: List[Dict[str, Any]] = []
129+
# Index pending interactions by tool call id for result matching
130+
pending: Dict[str, Dict[str, Any]] = {}
121131

122132
for msg in messages:
123133
content = msg.get("content", "")
124134
role = msg.get("role", "")
125135

136+
# --- OpenClaw native: assistant message with toolCall content blocks ---
126137
if role == "assistant" and isinstance(content, list):
138+
# Grab any assistant text from the same message (thinking narration)
139+
assistant_text = ""
140+
for b in content:
141+
if isinstance(b, dict) and b.get("type") == "text":
142+
assistant_text = b.get("text", "")[:200]
143+
break
144+
127145
for block in content:
128-
if isinstance(block, dict) and block.get("type") == "toolCall":
129-
interaction = {
130-
"tool_name": block.get("toolName", "unknown"),
131-
"input_summary": json.dumps(block.get("input", {}))[:200],
132-
"output_summary": "",
133-
"output_size": 0,
134-
"assistant_text": "",
135-
}
136-
# Capture assistant text from the same message
137-
for b2 in content:
138-
if isinstance(b2, dict) and b2.get("type") == "text":
139-
interaction["assistant_text"] = b2.get("text", "")[:200]
140-
interactions.append(interaction)
141-
142-
# OpenAI-style tool_calls format
146+
if not (isinstance(block, dict) and block.get("type") == "toolCall"):
147+
continue
148+
tool_name = block.get("name") or block.get("toolName") or "unknown"
149+
args = block.get("arguments") or block.get("input") or {}
150+
call_id = block.get("id", "")
151+
interaction = {
152+
"tool_name": tool_name,
153+
"input_summary": json.dumps(args)[:300] if isinstance(args, dict) else str(args)[:300],
154+
"output_summary": "",
155+
"output_size": 0,
156+
"assistant_text": assistant_text,
157+
}
158+
interactions.append(interaction)
159+
if call_id:
160+
pending[call_id] = interaction
161+
162+
# --- OpenClaw native: toolResult message ---
163+
elif role == "toolResult":
164+
tool_call_id = msg.get("toolCallId", "")
165+
# Result text lives in content[0].text
166+
result_text = ""
167+
if isinstance(content, list):
168+
for block in content:
169+
if isinstance(block, dict) and block.get("type") == "text":
170+
result_text = block.get("text", "")
171+
break
172+
elif isinstance(content, str):
173+
result_text = content
174+
175+
# Match by toolCallId first, fall back to last pending
176+
target = pending.pop(tool_call_id, None)
177+
if target is None and interactions:
178+
# Fallback: attach to most recent interaction without a result
179+
for ix in reversed(interactions):
180+
if not ix["output_summary"]:
181+
target = ix
182+
break
183+
if target is not None and not target["output_summary"]:
184+
target["output_summary"] = result_text[:500]
185+
target["output_size"] = len(result_text)
186+
187+
# --- Legacy OpenAI-style tool_calls array ---
143188
elif role == "assistant" and "tool_calls" in msg:
144189
for tc in msg["tool_calls"]:
145190
func = tc.get("function", {})
146191
interaction = {
147192
"tool_name": func.get("name", "unknown"),
148-
"input_summary": func.get("arguments", "")[:200],
193+
"input_summary": func.get("arguments", "")[:300],
149194
"output_summary": "",
150195
"output_size": 0,
151196
"assistant_text": content[:200] if isinstance(content, str) else "",
152197
}
153198
interactions.append(interaction)
154-
155-
elif role == "tool" and isinstance(content, list):
156-
for block in content:
157-
if isinstance(block, dict) and block.get("type") == "toolResult":
158-
result_text = str(block.get("result", ""))
159-
# Attach to the last interaction if available
160-
if interactions and not interactions[-1]["output_summary"]:
161-
interactions[-1]["output_summary"] = result_text[:500]
162-
interactions[-1]["output_size"] = len(result_text)
163-
164-
elif role == "tool" and isinstance(content, str):
165-
if interactions and not interactions[-1]["output_summary"]:
166-
interactions[-1]["output_summary"] = content[:500]
167-
interactions[-1]["output_size"] = len(content)
199+
call_id = tc.get("id", "")
200+
if call_id:
201+
pending[call_id] = interaction
202+
203+
# --- Legacy OpenAI-style role=tool result ---
204+
elif role == "tool":
205+
tool_call_id = msg.get("tool_call_id", "")
206+
result_text = ""
207+
if isinstance(content, list):
208+
for block in content:
209+
if isinstance(block, dict):
210+
result_text = str(block.get("result") or block.get("text") or "")
211+
break
212+
elif isinstance(content, str):
213+
result_text = content
214+
215+
target = pending.pop(tool_call_id, None)
216+
if target is None and interactions:
217+
for ix in reversed(interactions):
218+
if not ix["output_summary"]:
219+
target = ix
220+
break
221+
if target is not None and not target["output_summary"]:
222+
target["output_summary"] = result_text[:500]
223+
target["output_size"] = len(result_text)
168224

169225
return interactions
170226

0 commit comments

Comments
 (0)