Skip to content

Commit 920fdaf

Browse files
authored
Merge pull request #236 from Hyperkid123/feat/RHCLOUD-48344
feat(bot): add transcript capture and progress instructions
2 parents c7b25e3 + 186d03c commit 920fdaf

6 files changed

Lines changed: 492 additions & 2 deletions

File tree

CLAUDE.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,13 @@ Active: `in_progress`, `pr_open`, `pr_changes`. Terminal: `done`, `archived`, `p
111111

112112
**Multi-repo**: One task per Jira ticket. Primary repo in `repo`, all in `metadata.repos`. PRs in `metadata.prs` as `[{"repo", "number", "url", "host"}]`.
113113

114+
### Cycle Progress Tools
115+
116+
| Tool | Purpose |
117+
|------|---------|
118+
| `progress_store` | Store structured cycle progress. Params: `task_id, instance_id, cycle_type, progress?, started_at?, finished_at?, tool_calls?, tokens_used?` |
119+
| `progress_load` | Load last N progress entries for a task. Params: `task_id, instance_id?, limit?` (default 5) |
120+
114121
### Memory Tools
115122

116123
| Tool | Purpose |
@@ -395,7 +402,23 @@ Keep task record updated throughout (not just end). `task_update` w/ `summary` +
395402
- `last_step`: `branch_created`/`implemented`/`tests_passing`/`push_failed`/`pr_opened`/`review_addressed`/`investigation_posted`/`archived`
396403
- `files_changed`, `commits`, `next_step`, `notes`, `repos`, `prs`
397404

398-
**On startup — interrupted work**: Triage output shows all `in_progress` tasks w/ `last_step`. Any w/ `last_step` set? → `memory_search` repo + problem → resume from `next_step`. Task metadata = specific work state. RAG memory = cross-ticket learnings.
405+
### Cycle Progress (progress_load / progress_store)
406+
407+
Persists structured progress across cycles. Separate from `task_update` — creates **history**, not just current state.
408+
409+
**On resume** (existing task, not new):
410+
1. `task_get(jira_key)` → note `id` field = `task_id`
411+
2. `progress_load(task_id=<id>)` → last 5 cycle summaries
412+
3. Use returned progress → understand prior decisions, files, blockers, where left off
413+
414+
**Before cycle ends** (after work on task):
415+
1. `progress_store(task_id=<id>, instance_id=<instance>, cycle_type="task_work", progress={...})`
416+
2. Progress keys: `last_step`, `next_step`, `files_changed`, `commits`, `key_decisions`, `blockers`, `notes`
417+
3. In addition to `task_update` — call both
418+
419+
Idle/error cycles: `run.py` handles automatically. No agent action.
420+
421+
**On startup — interrupted work**: `in_progress` w/ `last_step` set? → `progress_load(task_id)` for cycle history + `memory_search` repo + problem → resume from `next_step`. Cycle progress = per-cycle history. Task metadata = current state. RAG memory = cross-ticket learnings.
399422

400423
## Rules
401424

bot/agent.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Core agent cycle — invokes Claude Agent SDK."""
22

3+
import json
34
import logging
45
import os
56
from dataclasses import dataclass
@@ -12,6 +13,7 @@
1213
ResultMessage,
1314
SystemMessage,
1415
TextBlock,
16+
ToolResultBlock,
1517
query,
1618
)
1719

@@ -35,6 +37,7 @@ class CycleContext:
3537
repo: str | None = None
3638
work_type: str | None = None
3739
summary: str | None = None
40+
task_id: int | None = None
3841

3942

4043
async def _push_status(
@@ -208,10 +211,11 @@ async def run_cycle(
208211
logger.info("[agent] %s", text[:300])
209212
# Push to dashboard
210213
await _push_status(http, "working", text[:500])
214+
elif isinstance(block, ToolResultBlock):
215+
_extract_task_id_from_result(block, ctx)
211216
elif hasattr(block, "name"):
212217
desc = _describe_tool_use(block)
213218
logger.info("[tool] %s", desc)
214-
# Extract work context from MCP tool calls
215219
_extract_context(block, ctx)
216220

217221
elif isinstance(message, ResultMessage):
@@ -304,3 +308,29 @@ def _extract_context(block, ctx: CycleContext) -> None:
304308
# Memory housekeeping
305309
elif name == "mcp__bot-memory__memory_delete":
306310
ctx.work_type = ctx.work_type or "memory_housekeeping"
311+
312+
313+
def _extract_task_id_from_result(block: ToolResultBlock, ctx: CycleContext) -> None:
314+
"""Extract task_id from MCP tool result content (task_add/task_get/task_update return task objects)."""
315+
content = block.content
316+
if not content:
317+
return
318+
try:
319+
text = (
320+
content
321+
if isinstance(content, str)
322+
else content[0].get("text", "")
323+
if isinstance(content, list)
324+
else ""
325+
)
326+
if not text:
327+
return
328+
data = json.loads(text)
329+
if (
330+
isinstance(data, dict)
331+
and isinstance(data.get("id"), int)
332+
and "jira_key" in data
333+
):
334+
ctx.task_id = data["id"]
335+
except (json.JSONDecodeError, TypeError, IndexError, AttributeError):
336+
pass

bot/run.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from .config import ALLOWED_TOOLS, Config, load_config, load_mcp_servers, sanitize_env
2121
from .costs import record_cost
2222
from .merge import apply_merged_config
23+
from .transcripts import record_transcript
2324

2425
SCRIPT_DIR = Path(__file__).resolve().parent.parent
2526
DATA_DIR = SCRIPT_DIR / "data"
@@ -342,6 +343,13 @@ def shutdown(sig, frame):
342343
result=result,
343344
ctx=ctx,
344345
)
346+
record_transcript(
347+
label=args.label,
348+
result=result,
349+
ctx=ctx,
350+
cwd=str(SCRIPT_DIR),
351+
instance_id=instance_id,
352+
)
345353
else:
346354
logger.warning("Cycle produced no result")
347355

bot/transcripts.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
"""Transcript capture — compresses and stores cycle transcripts via the dashboard API."""
2+
3+
from __future__ import annotations
4+
5+
import base64
6+
import logging
7+
import os
8+
from datetime import datetime, timedelta, timezone
9+
from pathlib import Path
10+
from typing import TYPE_CHECKING
11+
12+
import httpx
13+
14+
if TYPE_CHECKING:
15+
from .agent import CycleContext
16+
17+
logger = logging.getLogger(__name__)
18+
19+
CYCLE_RUNS_API = os.environ.get(
20+
"CYCLE_RUNS_API_URL", "http://localhost:8080/api/cycle-runs"
21+
)
22+
23+
_WORK_TYPE_TO_CYCLE_TYPE = {
24+
"new_ticket": "task_work",
25+
"pr_review": "task_work",
26+
"ci_fix": "task_work",
27+
"idle": "idle",
28+
"memory_housekeeping": "idle",
29+
"error": "error",
30+
}
31+
32+
33+
def _resolve_cycle_type(work_type: str | None, is_error: bool) -> str:
34+
if is_error:
35+
return "error"
36+
if work_type:
37+
return _WORK_TYPE_TO_CYCLE_TYPE.get(work_type, "task_work")
38+
return "triage_only"
39+
40+
41+
def _find_transcript(session_id: str, cwd: str) -> Path | None:
42+
"""Locate the Claude session transcript JSONL file."""
43+
slug = cwd.replace("/", "-")
44+
if not slug.startswith("-"):
45+
slug = "-" + slug
46+
home = Path.home()
47+
path = home / ".claude" / "projects" / slug / f"{session_id}.jsonl"
48+
if path.exists():
49+
return path
50+
# Fallback: scan project dirs for the session file
51+
projects_dir = home / ".claude" / "projects"
52+
if projects_dir.is_dir():
53+
for candidate in projects_dir.iterdir():
54+
f = candidate / f"{session_id}.jsonl"
55+
if f.exists():
56+
return f
57+
return None
58+
59+
60+
def record_transcript(
61+
label: str,
62+
result,
63+
ctx: CycleContext | None = None,
64+
cwd: str = "",
65+
instance_id: str | None = None,
66+
) -> None:
67+
"""Compress and store the cycle transcript + metadata to the dashboard API."""
68+
session_id = getattr(result, "session_id", "")
69+
if not session_id:
70+
logger.debug("No session_id in result — skipping transcript capture")
71+
return
72+
73+
usage = getattr(result, "usage", None) or {}
74+
is_error = getattr(result, "subtype", "") != "success"
75+
cycle_type = _resolve_cycle_type(ctx.work_type if ctx else None, is_error)
76+
77+
duration_ms = getattr(result, "duration_ms", None) or 0
78+
now = datetime.now(timezone.utc)
79+
started_at = now
80+
if duration_ms:
81+
started_at = now - timedelta(milliseconds=duration_ms)
82+
83+
body: dict = {
84+
"task_id": ctx.task_id if ctx else None,
85+
"cycle_type": cycle_type,
86+
"instance_id": instance_id or label,
87+
"started_at": started_at.isoformat(),
88+
"finished_at": now.isoformat(),
89+
"tool_calls": getattr(result, "num_turns", 0),
90+
"tokens_used": usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
91+
"progress": {
92+
"jira_key": ctx.jira_key if ctx else None,
93+
"repo": ctx.repo if ctx else None,
94+
"work_type": ctx.work_type if ctx else None,
95+
"summary": ctx.summary if ctx else None,
96+
},
97+
}
98+
99+
transcript_path = _find_transcript(session_id, cwd)
100+
if transcript_path:
101+
try:
102+
import zstandard as zstd
103+
104+
raw = transcript_path.read_bytes()
105+
compressor = zstd.ZstdCompressor(level=19)
106+
compressed = compressor.compress(raw)
107+
body["transcript_b64"] = base64.b64encode(compressed).decode()
108+
logger.info(
109+
"Transcript: %d bytes → %d compressed (%.0f%% savings)",
110+
len(raw),
111+
len(compressed),
112+
(1 - len(compressed) / len(raw)) * 100 if raw else 0,
113+
)
114+
except ImportError:
115+
logger.warning(
116+
"zstandard not installed — storing cycle run without transcript"
117+
)
118+
except Exception:
119+
logger.warning("Failed to read/compress transcript", exc_info=True)
120+
else:
121+
logger.debug("Transcript file not found for session %s", session_id)
122+
123+
try:
124+
httpx.post(CYCLE_RUNS_API, json=body, timeout=10.0)
125+
except Exception:
126+
logger.warning("Failed to push cycle run to API", exc_info=True)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies = [
88
"filelock",
99
"httpx",
1010
"python-dotenv",
11+
"zstandard>=0.23",
1112
]
1213

1314
[project.optional-dependencies]

0 commit comments

Comments
 (0)