Skip to content

Commit a40aba2

Browse files
Align intermediates results
1 parent 7fddbc5 commit a40aba2

File tree

2 files changed

+7
-15
lines changed

2 files changed

+7
-15
lines changed

pyagentspec/src/pyagentspec/evaluation/intermediates/computing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ async def _compute_intermediates(
2525
return Dataset.from_dict(
2626
data={
2727
sample_id: {
28-
intermediate.name: _result_to_dict(results[(sample_id, intermediate.name)])
28+
intermediate.name: _result_to_dict(results[(sample_id, intermediate.name)])["value"] # type: ignore
2929
for intermediate in intermediates
3030
}
3131
async for sample_id in dataset.ids()

pyagentspec/tests/evaluation/intermediates/test_intermediates.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,7 @@ async def test_intermediate_call_respects_mapping() -> None:
6060
augmented_sample = await augmented.get_sample(0)
6161

6262
assert augmented_sample["external_value"] == 1
63-
assert augmented_sample["echo"]["value"] == 1
64-
assert augmented_sample["echo"]["details"]["idx"] == 0
65-
assert augmented_sample["echo"]["details"]["intermediate"] is True
63+
assert augmented_sample["echo"] == 1
6664

6765

6866
@pytest.mark.anyio
@@ -75,18 +73,12 @@ async def test_add_multiple_intermediates_merges_samples(dataset: Dataset) -> No
7573
second = await augmented.get_sample(2)
7674

7775
assert first["value"] == 1
78-
assert first["echo"]["value"] == 1
79-
assert first["echo"]["details"]["idx"] == 0
80-
assert first["echo"]["details"]["intermediate"] is True
81-
assert first["stateful"]["value"] == "intermediate-0"
82-
assert first["stateful"]["details"] == {"idx": 0}
76+
assert first["echo"] == 1
77+
assert first["stateful"] == "intermediate-0"
8378

8479
assert second["value"] == 3
85-
assert second["echo"]["value"] == 3
86-
assert second["echo"]["details"]["idx"] == 2
87-
assert second["echo"]["details"]["intermediate"] is True
88-
assert second["stateful"]["value"] == "intermediate-2"
89-
assert second["stateful"]["details"] == {"idx": 2}
80+
assert second["echo"] == 3
81+
assert second["stateful"] == "intermediate-2"
9082

9183

9284
@pytest.mark.anyio
@@ -103,4 +95,4 @@ async def compute_value(self, *, value: int) -> Tuple[int, Dict[str, Any]]:
10395
augmented = await add_intermediates(dataset, [intermediate])
10496

10597
sample = await augmented.get_sample(0)
106-
assert sample["kw_only"]["value"] == 11
98+
assert sample["kw_only"] == 11

0 commit comments

Comments
 (0)