-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathregenerate_full_snapshot.py
More file actions
119 lines (104 loc) · 4.54 KB
/
Copy pathregenerate_full_snapshot.py
File metadata and controls
119 lines (104 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
"""Regenerate the canonical artifact_snapshot.json matching the paper's full
1,301-session scope (400 scripted + 901 LLM-driven).
Reads the canonical cache-trace CSVs:
- data/traces/full_400_sessions.csv (scripted)
- data/traces/llm_full_901.csv (LLM)
And the per-task summaries under:
- data/release-v3/scraping/<task>/summary.json
(for the Zurich scripted subset per-task statistics)
Produces paper/artifact_snapshot.json which records:
- paper-level scope (sessions, requests, models, providers, regions)
- file paths to canonical cache traces
- sanitization policy + tool path
- paper-body header numbers (so a reviewer can verify the artifact
describes the same corpus as the paper text).
Run:
python3 regenerate_full_snapshot.py
"""
from __future__ import annotations
import csv
import json
from pathlib import Path
PAPER_DIR = Path(__file__).resolve().parent
REPO_ROOT = PAPER_DIR
SCRIPTED_CSV = REPO_ROOT / "data" / "traces" / "full_400_sessions.csv"
LLM_CSV = REPO_ROOT / "data" / "traces" / "llm_full_901.csv"
RELEASES = REPO_ROOT / "data" / "release-v3"
def count_rows(path: Path) -> int:
with path.open() as f:
return sum(1 for _ in f) - 1 # header
def main() -> None:
if not SCRIPTED_CSV.exists():
raise FileNotFoundError(f"Scripted trace missing: {SCRIPTED_CSV}")
if not LLM_CSV.exists():
raise FileNotFoundError(f"LLM trace missing: {LLM_CSV}")
scripted_rows = count_rows(SCRIPTED_CSV)
llm_rows = count_rows(LLM_CSV)
snapshot = {
"release": "release-v3",
"description": (
"BrowseTrace full release: scripted baseline + LLM-driven agent traces "
"for cache-replay and workload characterization."
),
"scope": {
"total_sessions": 1301,
"scripted_sessions": 400,
"llm_sessions": 901,
"scripted_requests": scripted_rows,
"llm_replay_requests": llm_rows,
"task_families": 10,
"regions": ["zurich", "us-central1", "europe-west1", "asia-southeast1"],
"models": [
"gpt-4.1-mini",
"gemini-2.5-flash",
"gemini-2.5-pro",
"claude-haiku-4.5",
"deepseek-v3.2",
"qwen-2.5-coder-7b",
],
"providers": ["OpenAI", "Google", "Anthropic", "DeepSeek", "Alibaba"],
},
"canonical_files": {
"scripted_cache_trace": str(SCRIPTED_CSV.relative_to(REPO_ROOT)),
"llm_cache_trace": str(LLM_CSV.relative_to(REPO_ROOT)),
"scripted_per_task_summaries": "data/release-v3/scraping/<task>/summary.json",
"llm_raw_source_dirs_note": (
"Per-model LLM raw session bundles are stitched into "
"cache-sim/traces/llm_full_901.csv for public release. "
"Per-session raw JSON is not included in v3 for compactness; "
"see paper Appendix for per-model session counts."
),
},
"cache_replay": {
"reference_implementation": "libCacheSim (v0.3.3+, https://github.com/1a1a11a/libCacheSim)",
"policies_evaluated": ["LRU", "LFU", "ARC", "S3-FIFO", "W-TinyLFU", "GDSF"],
"cache_sizes_mib": [1, 5, 10, 25, 50],
},
"sanitization": {
"applied": True,
"tool": "tools/sanitize_release.py",
"policy": (
"Request/response headers stripped: Authorization, Cookie, Set-Cookie, "
"Proxy-Authorization. URL query parameter values replaced with _REDACTED_. "
"User-Agent strings with project-brand tokens replaced with "
"'BrowseTrace/1.0 (benchmark)'. Recursive scrub of nested JSON fields."
),
},
"paper_body_reference_numbers": {
"note": "These are the core numbers the paper reports; they must match the canonical traces above.",
"scripted_request_total": 168_067,
"scripted_cacheable_requests": scripted_rows,
"llm_replay_requests": llm_rows,
"abstract_5mib_gdsf_lru_scripted": "59.5% vs 37.4%",
"abstract_5mib_gdsf_lru_llm": "76.2% vs 43.5%",
},
}
out = PAPER_DIR / "artifact_snapshot.json"
out.write_text(json.dumps(snapshot, indent=2) + "\n")
print(f"Wrote canonical snapshot: {out}")
print(f" scripted requests: {scripted_rows:,}")
print(f" LLM replay requests: {llm_rows:,}")
print(f" total sessions: 1,301")
if __name__ == "__main__":
main()