Skip to content

Commit 1b316f2

Browse files
andreasrongeclaudegithub-actions[bot]
authored
feat(lisp): surface catalog_ops on Step (#920) (#937)
* feat(lisp): surface catalog_ops on Step (#920) `EvalContext` already collected per-call records for the PTC-Lisp `catalog/` builtins (operation, args, outcome, reason, duration_ms) via `append_catalog_op/2`, but nothing propagated them out of the sandbox — `Step` had `tool_calls` and `pmap_calls` but no `catalog_ops`, so the tracing data was dropped at execution boundary. ## Changes * `Step` gains a `:catalog_ops` field of type `[catalog_op()]`. `@type catalog_op` mirrors the EvalContext definition. Default-constructed steps (`Step.ok/2`, `Step.error/*`) initialise it to `[]`. * `Lisp.apply_memory_contract/3` propagates `Enum.reverse(ctx.catalog_ops)` into the success Step. * The `error_with_ctx` branch in `Lisp.run/2` does the same for programs that fail mid-execution. Ordering is chronological (oldest first), matching how `tool_calls` and `pmap_calls` are already surfaced. ## Tests Four new cases in `test/catalog_builtins_test.exs`: 1. Single successful op produces one `:ok` record with args + duration 2. Multiple ops appear in chronological order 3. World fault produces `:nil_world_fault` record with reason atom 4. Programs without catalog calls expose `step.catalog_ops == []` Full suite green: 4927 tests, 0 failures. Closes #920 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * fix(lisp): propagate catalog_ops through merge, closures, and HOF stash Fixes three context-propagation gaps identified in PR #937 review: - EvalContext.merge/2: add catalog_ops to parallel branch merge (pmap/pcalls) - execute_closure/4: carry caller_ctx.catalog_ops into closure context - push/stash/pop_side_effects: include catalog_ops in HOF stash mechanism Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
1 parent 08b439f commit 1b316f2

5 files changed

Lines changed: 89 additions & 3 deletions

File tree

lib/ptc_runner/lisp.ex

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ defmodule PtcRunner.Lisp do
497497
prints: eval_ctx.prints,
498498
tool_calls: cleaned_tool_calls,
499499
pmap_calls: cleaned_pmap_calls,
500+
catalog_ops: Enum.reverse(eval_ctx.catalog_ops),
500501
child_traces: child_traces,
501502
child_steps: child_steps,
502503
journal: eval_ctx.journal,
@@ -668,6 +669,7 @@ defmodule PtcRunner.Lisp do
668669
prints: Enum.reverse(ctx.prints),
669670
tool_calls: cleaned_tool_calls,
670671
pmap_calls: cleaned_pmap_calls,
672+
catalog_ops: Enum.reverse(ctx.catalog_ops),
671673
child_traces: child_traces,
672674
child_steps: child_steps
673675
}

lib/ptc_runner/lisp/eval/apply.ex

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ defmodule PtcRunner.Lisp.Eval.Apply do
638638

639639
defp push_side_effect_stash do
640640
stack = Process.get(:__ptc_hof_stack, [])
641-
Process.put(:__ptc_hof_stack, [%{tool_calls: [], prints: []} | stack])
641+
Process.put(:__ptc_hof_stack, [%{tool_calls: [], prints: [], catalog_ops: []} | stack])
642642
end
643643

644644
defp stash_side_effects(%EvalContext{} = ctx) do
@@ -649,7 +649,8 @@ defmodule PtcRunner.Lisp.Eval.Apply do
649649
# then previous invocations'.
650650
updated = %{
651651
tool_calls: ctx.tool_calls ++ top.tool_calls,
652-
prints: ctx.prints ++ top.prints
652+
prints: ctx.prints ++ top.prints,
653+
catalog_ops: ctx.catalog_ops ++ top.catalog_ops
653654
}
654655

655656
Process.put(:__ptc_hof_stack, [updated | rest])
@@ -670,6 +671,7 @@ defmodule PtcRunner.Lisp.Eval.Apply do
670671
eval_ctx
671672
|> Map.update!(:tool_calls, fn existing -> top.tool_calls ++ existing end)
672673
|> Map.update!(:prints, fn existing -> top.prints ++ existing end)
674+
|> Map.update!(:catalog_ops, fn existing -> top.catalog_ops ++ existing end)
673675

674676
[] ->
675677
eval_ctx
@@ -717,7 +719,8 @@ defmodule PtcRunner.Lisp.Eval.Apply do
717719
tool_cache: caller_ctx.tool_cache,
718720
summaries: caller_ctx.summaries,
719721
journal: caller_ctx.journal,
720-
catalog_exec: caller_ctx.catalog_exec
722+
catalog_exec: caller_ctx.catalog_exec,
723+
catalog_ops: caller_ctx.catalog_ops
721724
}
722725

723726
case do_eval_fn.(body, closure_ctx) do

lib/ptc_runner/lisp/eval/context.ex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ defmodule PtcRunner.Lisp.Eval.Context do
327327
| prints: ctx2.prints ++ ctx1.prints,
328328
tool_calls: ctx2.tool_calls ++ ctx1.tool_calls,
329329
pmap_calls: ctx2.pmap_calls ++ ctx1.pmap_calls,
330+
catalog_ops: ctx2.catalog_ops ++ ctx1.catalog_ops,
330331
user_ns: Map.merge(ctx1.user_ns, ctx2.user_ns),
331332
iteration_count: ctx1.iteration_count + ctx2.iteration_count,
332333
summaries: Map.merge(ctx1.summaries, ctx2.summaries),

lib/ptc_runner/step.ex

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ defmodule PtcRunner.Step do
193193
:prints,
194194
:tool_calls,
195195
:pmap_calls,
196+
:catalog_ops,
196197
:child_traces,
197198
:child_steps,
198199
:messages,
@@ -300,6 +301,30 @@ defmodule PtcRunner.Step do
300301
error_count: non_neg_integer()
301302
}
302303

304+
@typedoc """
305+
PTC-Lisp `catalog/` builtin invocation record (aggregator mode).
306+
307+
Captured for each `catalog/summary`, `catalog/list-servers`,
308+
`catalog/list-tools`, `catalog/describe-tool`, and
309+
`catalog/search-tools` call dispatched through `catalog_exec`.
310+
311+
Fields:
312+
- `operation`: The builtin variant (`:summary`, `:list_servers`,
313+
`:list_tools`, `:describe_tool`, `:search_tools`)
314+
- `args`: Normalized argument map (shape depends on operation)
315+
- `outcome`: `:ok` on success, `:nil_world_fault` when a world fault
316+
was swallowed to `nil`, `:error` on programmer faults that raised
317+
- `reason`: World-fault reason atom when `outcome == :nil_world_fault`
318+
- `duration_ms`: How long the catalog dispatch took
319+
"""
320+
@type catalog_op :: %{
321+
operation: atom(),
322+
args: map(),
323+
outcome: :ok | :nil_world_fault | :error,
324+
reason: atom() | nil,
325+
duration_ms: non_neg_integer()
326+
}
327+
303328
@type t :: %__MODULE__{
304329
return: term() | nil,
305330
fail: fail() | nil,
@@ -314,6 +339,7 @@ defmodule PtcRunner.Step do
314339
prints: [String.t()],
315340
tool_calls: [tool_call()],
316341
pmap_calls: [pmap_call()],
342+
catalog_ops: [catalog_op()],
317343
child_traces: [String.t()],
318344
child_steps: [t()],
319345
messages: [message()] | nil,
@@ -350,6 +376,7 @@ defmodule PtcRunner.Step do
350376
prints: [],
351377
tool_calls: [],
352378
pmap_calls: [],
379+
catalog_ops: [],
353380
child_traces: [],
354381
child_steps: []
355382
}
@@ -428,6 +455,7 @@ defmodule PtcRunner.Step do
428455
prints: [],
429456
tool_calls: [],
430457
pmap_calls: [],
458+
catalog_ops: [],
431459
child_traces: [],
432460
child_steps: []
433461
}

test/catalog_builtins_test.exs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,4 +370,56 @@ defmodule PtcRunner.CatalogBuiltinsTest do
370370
assert step.return == []
371371
end
372372
end
373+
374+
# ============================================================
375+
# catalog_ops tracing is surfaced on Step (#920)
376+
# ============================================================
377+
378+
describe "step.catalog_ops" do
379+
test "successful op produces one ok record in execution order" do
380+
{:ok, step} =
381+
Lisp.run(
382+
~s|(catalog/describe-tool "github" "search")|,
383+
catalog_exec: mock_catalog_exec()
384+
)
385+
386+
assert [op] = step.catalog_ops
387+
assert op.operation == :describe_tool
388+
assert op.outcome == :ok
389+
assert op.reason == nil
390+
assert op.args == %{server: "github", tool: "search"}
391+
assert is_integer(op.duration_ms) and op.duration_ms >= 0
392+
end
393+
394+
test "multiple ops appear in chronological (not reverse) order" do
395+
{:ok, step} =
396+
Lisp.run(
397+
~s|(do (catalog/list-servers) (catalog/list-tools "github") (catalog/summary))|,
398+
catalog_exec: mock_catalog_exec()
399+
)
400+
401+
assert [op1, op2, op3] = step.catalog_ops
402+
assert op1.operation == :list_servers
403+
assert op2.operation == :list_tools
404+
assert op2.args == %{server: "github"}
405+
assert op3.operation == :summary
406+
end
407+
408+
test "world fault produces :nil_world_fault record with reason" do
409+
exec = fn _op, _args -> {:world_fault, :upstream_unavailable} end
410+
411+
{:ok, step} =
412+
Lisp.run(~s|(or (catalog/list-tools "github") [])|, catalog_exec: exec)
413+
414+
assert [op] = step.catalog_ops
415+
assert op.operation == :list_tools
416+
assert op.outcome == :nil_world_fault
417+
assert op.reason == :upstream_unavailable
418+
end
419+
420+
test "step from a program without catalog calls has empty catalog_ops" do
421+
{:ok, step} = Lisp.run("(+ 1 2)", catalog_exec: mock_catalog_exec())
422+
assert step.catalog_ops == []
423+
end
424+
end
373425
end

0 commit comments

Comments
 (0)