Skip to content

Commit 924be7a

Browse files
nv-alichengclaude
andcommitted
test(integration): use local char-tokenizer fixture, drop HF Hub dependency
Two integration tests in PR #306's metrics-aggregator path were flaky / slow in CI because of HuggingFace Hub: - `TestTemplateIntegration::test_template_runs` (6 cases) called `AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")` on the aggregator subprocess's startup path. Cold-cache CI runs paid the ~1 MB download + tokenizer-init cost, sometimes pushing subprocess startup past the parent launcher's 30 s timeout. Also required network egress / HF_TOKEN for some CI environments. - `test_signal_handling.py` (new tests) were not affected (they don't pass `--tokenizer`), but the parent-owns-output-dir contract from the earlier #9 follow-up also applied — those tests now create the output dir themselves before spawning the subprocess. Fix: drop in a local character-level tokenizer fixture at `tests/assets/tokenizers/char/`. ~3 KB total (`tokenizer.json` + `tokenizer_config.json`). Loaded via the existing `AutoTokenizer.from_pretrained(local_dir)` codepath — no test-only hooks in production code. Each character is one token, which is enough for the aggregator's ISL/OSL/TPOT triggers to produce deterministic counts (the e2e test path doesn't care about tokenization correctness, only that *some* count appears). Effects: no network call on the aggregator startup path for these tests, no HF_TOKEN requirement, and tokenizer load completes in single-digit ms instead of seconds. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8f14e9e commit 924be7a

4 files changed

Lines changed: 214 additions & 7 deletions

File tree

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
{
2+
"version": "1.0",
3+
"truncation": null,
4+
"padding": null,
5+
"added_tokens": [
6+
{
7+
"id": 0,
8+
"content": "<unk>",
9+
"single_word": false,
10+
"lstrip": false,
11+
"rstrip": false,
12+
"normalized": false,
13+
"special": true
14+
},
15+
{
16+
"id": 1,
17+
"content": "<pad>",
18+
"single_word": false,
19+
"lstrip": false,
20+
"rstrip": false,
21+
"normalized": false,
22+
"special": true
23+
},
24+
{
25+
"id": 2,
26+
"content": "<s>",
27+
"single_word": false,
28+
"lstrip": false,
29+
"rstrip": false,
30+
"normalized": false,
31+
"special": true
32+
},
33+
{
34+
"id": 3,
35+
"content": "</s>",
36+
"single_word": false,
37+
"lstrip": false,
38+
"rstrip": false,
39+
"normalized": false,
40+
"special": true
41+
}
42+
],
43+
"normalizer": null,
44+
"pre_tokenizer": {
45+
"type": "Split",
46+
"pattern": {
47+
"String": ""
48+
},
49+
"behavior": "Isolated",
50+
"invert": false
51+
},
52+
"post_processor": {
53+
"type": "TemplateProcessing",
54+
"single": [
55+
{
56+
"Sequence": {
57+
"id": "A",
58+
"type_id": 0
59+
}
60+
}
61+
],
62+
"pair": [
63+
{
64+
"Sequence": {
65+
"id": "A",
66+
"type_id": 0
67+
}
68+
},
69+
{
70+
"Sequence": {
71+
"id": "B",
72+
"type_id": 1
73+
}
74+
}
75+
],
76+
"special_tokens": {}
77+
},
78+
"decoder": null,
79+
"model": {
80+
"type": "WordLevel",
81+
"vocab": {
82+
"<unk>": 0,
83+
"<pad>": 1,
84+
"<s>": 2,
85+
"</s>": 3,
86+
"a": 4,
87+
"b": 5,
88+
"c": 6,
89+
"d": 7,
90+
"e": 8,
91+
"f": 9,
92+
"g": 10,
93+
"h": 11,
94+
"i": 12,
95+
"j": 13,
96+
"k": 14,
97+
"l": 15,
98+
"m": 16,
99+
"n": 17,
100+
"o": 18,
101+
"p": 19,
102+
"q": 20,
103+
"r": 21,
104+
"s": 22,
105+
"t": 23,
106+
"u": 24,
107+
"v": 25,
108+
"w": 26,
109+
"x": 27,
110+
"y": 28,
111+
"z": 29,
112+
"A": 30,
113+
"B": 31,
114+
"C": 32,
115+
"D": 33,
116+
"E": 34,
117+
"F": 35,
118+
"G": 36,
119+
"H": 37,
120+
"I": 38,
121+
"J": 39,
122+
"K": 40,
123+
"L": 41,
124+
"M": 42,
125+
"N": 43,
126+
"O": 44,
127+
"P": 45,
128+
"Q": 46,
129+
"R": 47,
130+
"S": 48,
131+
"T": 49,
132+
"U": 50,
133+
"V": 51,
134+
"W": 52,
135+
"X": 53,
136+
"Y": 54,
137+
"Z": 55,
138+
"0": 56,
139+
"1": 57,
140+
"2": 58,
141+
"3": 59,
142+
"4": 60,
143+
"5": 61,
144+
"6": 62,
145+
"7": 63,
146+
"8": 64,
147+
"9": 65,
148+
" ": 66,
149+
"\t": 67,
150+
"\n": 68,
151+
"\r": 69,
152+
"!": 70,
153+
"\"": 71,
154+
"#": 72,
155+
"$": 73,
156+
"%": 74,
157+
"&": 75,
158+
"'": 76,
159+
"(": 77,
160+
")": 78,
161+
"*": 79,
162+
"+": 80,
163+
",": 81,
164+
"-": 82,
165+
".": 83,
166+
"/": 84,
167+
":": 85,
168+
";": 86,
169+
"<": 87,
170+
"=": 88,
171+
">": 89,
172+
"?": 90,
173+
"@": 91,
174+
"[": 92,
175+
"\\": 93,
176+
"]": 94,
177+
"^": 95,
178+
"_": 96,
179+
"`": 97,
180+
"{": 98,
181+
"|": 99,
182+
"}": 100,
183+
"~": 101
184+
},
185+
"unk_token": "<unk>"
186+
}
187+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"backend": "tokenizers",
3+
"bos_token": "<s>",
4+
"eos_token": "</s>",
5+
"model_max_length": 1000000000000000019884624838656,
6+
"pad_token": "<pad>",
7+
"tokenizer_class": "TokenizersBackend",
8+
"unk_token": "<unk>"
9+
}

tests/integration/async_utils/services/metrics_aggregator/test_signal_handling.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ def test_sigterm_writes_interrupted_final_snapshot(self, tmp_path: Path):
9595
socket_dir = tmp_path / "sockets"
9696
socket_dir.mkdir()
9797
output_dir = tmp_path / "output"
98+
# The parent owns directory setup — the aggregator subprocess
99+
# fail-fasts (SystemExit) on a missing output dir to surface
100+
# contract violations in its own stderr instead of crashing
101+
# later on the atomic-write path. Mirror that contract here.
102+
output_dir.mkdir()
98103
# Use a unique socket name per test to avoid collisions if a
99104
# previous test run left an IPC file behind.
100105
suffix = uuid.uuid4().hex[:8]
@@ -149,6 +154,7 @@ def test_sigint_does_not_finalize_aggregator(self, tmp_path: Path):
149154
socket_dir = tmp_path / "sockets"
150155
socket_dir.mkdir()
151156
output_dir = tmp_path / "output"
157+
output_dir.mkdir() # parent owns dir setup (see sibling test)
152158
suffix = uuid.uuid4().hex[:8]
153159
proc = _spawn_aggregator(
154160
socket_dir,

tests/integration/commands/test_benchmark_command.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,18 @@ def test_mode_logging(self, mock_http_echo_server, ds_dataset_path, caplog):
183183
)
184184

185185

186-
# Non-gated tokenizer model used in place of the templates' default
187-
# (which references gated meta-llama/Llama-3.1-*). The echo-server e2e
188-
# path doesn't care about the model identity, only that the tokenizer
189-
# exists for the metrics aggregator's ISL/OSL/TPOT triggers. TinyLlama's
190-
# tokenizer is ~1MB and matches the Llama-family tokenizer the templates
191-
# were written against.
192-
_TEST_MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
186+
# Local character-level tokenizer fixture used in place of the templates'
187+
# default (which references gated `meta-llama/Llama-3.1-*`). The echo-server
188+
# e2e path doesn't care about the model identity, only that a tokenizer
189+
# loads for the metrics aggregator's ISL/OSL/TPOT triggers. Using a local
190+
# fixture removes the HuggingFace Hub dependency from CI: no network call,
191+
# no ~1 MB download, no HF_TOKEN requirement, and the load completes in
192+
# milliseconds rather than seconds — well inside the parent launcher's
193+
# readiness timeout. ``AutoTokenizer.from_pretrained`` supports local
194+
# directories as a first-class input, so this uses the same production
195+
# code path with no test-only hooks.
196+
_TEST_TOKENIZER_DIR = Path(__file__).resolve().parents[2] / "assets/tokenizers/char"
197+
_TEST_MODEL_NAME = str(_TEST_TOKENIZER_DIR)
193198

194199

195200
def _resolve_template(template_path: Path, server_url: str) -> dict:

0 commit comments

Comments
 (0)