Skip to content

Commit 9969ff9

Browse files
committed
[evals] Add held-out sample caps for diff/patch sources
1 parent 5279a99 commit 9969ff9

3 files changed

Lines changed: 37 additions & 0 deletions

File tree

docs/explanations/diff_patch_ppl_leakage_checks.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,7 @@ Fail the run if any eval row shares a hash with training rows after normalizatio
1919
4. Keep eval source snapshots immutable.
2020
Pin source revisions (dataset snapshot or commit hash) in the build logs.
2121
Do not regenerate eval rows from moving HEAD references.
22+
23+
5. Cap held-out samples per source.
24+
Use small held-out caps to avoid full-corpus downloads during eval wiring.
25+
Current caps: SWE-bench issue-to-patch 256, SWE-bench raw git diff 256, CommitPack commit-message-plus-diff 512.

experiments/exp5095_diff_patch_ppl.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class DiffPatchSlice:
4343
name: str
4444
relative_path: str
4545
metrics: tuple[DiffPatchMetric, ...]
46+
held_out_sample_cap: int
4647

4748
@property
4849
def tags(self) -> tuple[str, ...]:
@@ -65,18 +66,21 @@ def to_raw_dataset(self, raw_root: str, metric: DiffPatchMetric) -> RawTextEvalu
6566
name="issue_to_patch",
6667
relative_path="swe_bench/issue_to_patch.jsonl.gz",
6768
metrics=(DiffPatchMetric.PATCH_TEXT, DiffPatchMetric.CONTEXT_PLUS_PATCH),
69+
held_out_sample_cap=256,
6870
),
6971
DiffPatchSlice(
7072
source="swe_bench",
7173
name="raw_git_diff",
7274
relative_path="swe_bench/raw_git_diff.jsonl.gz",
7375
metrics=(DiffPatchMetric.PATCH_TEXT,),
76+
held_out_sample_cap=256,
7477
),
7578
DiffPatchSlice(
7679
source="commitpack",
7780
name="commit_message_plus_diff",
7881
relative_path="commitpack/commit_message_plus_diff.jsonl.gz",
7982
metrics=(DiffPatchMetric.PATCH_TEXT, DiffPatchMetric.CONTEXT_PLUS_PATCH),
83+
held_out_sample_cap=512,
8084
),
8185
)
8286

@@ -154,6 +158,26 @@ def build_diff_patch_raw_validation_sets(
154158
return datasets
155159

156160

161+
def diff_patch_source_sampling_plan(
162+
*,
163+
slices: tuple[DiffPatchSlice, ...] = DIFF_PATCH_SLICES,
164+
) -> dict[str, dict[str, object]]:
165+
"""Small held-out sampling plan for source builders.
166+
167+
The plan is intentionally metadata-only so source integration can cap
168+
downloads before data ingestion.
169+
"""
170+
171+
return {
172+
f"{slice_spec.source}/{slice_spec.name}": {
173+
"held_out_sample_cap": slice_spec.held_out_sample_cap,
174+
"split": "validation",
175+
"source": slice_spec.source,
176+
}
177+
for slice_spec in slices
178+
}
179+
180+
157181
ACTIVE_DIFF_PATCH_DATASETS: dict[str, RawTextEvaluationDataset] = build_diff_patch_raw_validation_sets()
158182

159183

tests/evals/test_exp5095_diff_patch_ppl.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,12 @@ def test_diff_patch_raw_validation_sets_prefixes_namespace() -> None:
7171
prefixed = diff_patch.diff_patch_raw_validation_sets()
7272
assert "diff_patch/swe_bench/issue_to_patch_patch_text" in prefixed
7373
assert "diff_patch/commitpack/commit_message_plus_diff_context_plus_patch" in prefixed
74+
75+
76+
def test_diff_patch_sampling_plan_uses_small_held_out_caps() -> None:
77+
plan = diff_patch.diff_patch_source_sampling_plan()
78+
79+
assert plan["swe_bench/issue_to_patch"]["held_out_sample_cap"] == 256
80+
assert plan["swe_bench/raw_git_diff"]["held_out_sample_cap"] == 256
81+
assert plan["commitpack/commit_message_plus_diff"]["held_out_sample_cap"] == 512
82+
assert all(entry["split"] == "validation" for entry in plan.values())

0 commit comments

Comments
 (0)