Skip to content

Commit 6e61701

Browse files
committed
Merge remote-tracking branch 'origin/main' into ajc/pynvml
2 parents 9d5a0c2 + bbfd30e commit 6e61701

30 files changed

+1557
-564
lines changed

docs/api/synthesis.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ Parameters for synthetic trace generation.
348348
**Fields:**
349349
- `speedup_ratio: float = 1.0` - Timestamp scaling multiplier (ge 0.0)
350350
- `prefix_len_multiplier: float = 1.0` - Core prefix length multiplier (ge 0.0)
351-
- `prefix_root_multiplier: int = 1` - Tree replication factor (ge 1)
351+
- `prefix_root_multiplier: int = 1` - Number of independent trees to distribute traces across (ge 1)
352352
- `prompt_len_multiplier: float = 1.0` - Leaf prompt length multiplier (ge 0.0)
353353
- `max_isl: int | None = None` - Maximum input sequence length filter
354354
- `block_size: int = 512` - KV cache page size (ge 1)

docs/cli_options.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ Multiplier for core prefix branch lengths in radix tree.
485485

486486
#### `--synthesis-prefix-root-multiplier` `<int>`
487487

488-
Number of times to replicate the radix tree structure.
488+
Number of independent radix trees to distribute traces across.
489489
<br>_Default: `1`_
490490

491491
#### `--synthesis-prompt-len-multiplier` `<float>`
@@ -495,7 +495,11 @@ Multiplier for leaf path (unique prompt) lengths.
495495

496496
#### `--synthesis-max-isl` `<int>`
497497

498-
Maximum input sequence length to include in synthesis.
498+
Maximum input sequence length for filtering. Traces with input_length > max_isl are skipped.
499+
500+
#### `--synthesis-max-osl` `<int>`
501+
502+
Maximum output sequence length cap. Traces with output_length > max_osl are capped to max_osl.
499503

500504
### Conversation Input
501505

docs/tutorials/prefix-synthesis.md

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -141,12 +141,14 @@ aiperf profile \
141141
```
142142

143143
#### `--synthesis-prefix-root-multiplier` (default: 1)
144-
Replicate the prefix tree structure N times:
145-
- `1`: No replication
146-
- `2`: Double the number of unique prefix combinations
147-
- `3`: Triple the number of unique prefix combinations
144+
Distribute traces across N independent radix trees:
145+
- `1`: All traces share the same prefix tree (default)
146+
- `2`: Traces randomly assigned to 2 independent trees (50% each)
147+
- `3`: Traces randomly assigned to 3 independent trees (33% each)
148148

149-
Example: Generate more diverse prefix patterns:
149+
Each tree has identical structure but different hash IDs, so traces in different trees cannot share prefixes. This reduces the effective cache hit rate by splitting the workload.
150+
151+
Example: Simulate lower cache hit rates with more diverse prefix roots:
150152
```bash
151153
aiperf profile \
152154
--input-file traces/production.jsonl \
@@ -171,11 +173,11 @@ aiperf profile \
171173
```
172174

173175
#### `--synthesis-max-isl` (optional)
174-
Cap the maximum input sequence length:
175-
- Not set: No cap
176-
- `4096`: Maximum 4,096 tokens per request
176+
Filter traces by maximum input sequence length. Traces with input_length > max_isl are skipped:
177+
- Not set: No filtering
178+
- `4096`: Skip traces with more than 4,096 input tokens
177179

178-
Example: Test with bounded context:
180+
Example: Filter out long contexts:
179181
```bash
180182
aiperf profile \
181183
--input-file traces/production.jsonl \
@@ -184,6 +186,20 @@ aiperf profile \
184186
...
185187
```
186188

189+
#### `--synthesis-max-osl` (optional)
190+
Cap traces to a maximum output sequence length. Traces with output_length > max_osl are capped to max_osl:
191+
- Not set: No capping
192+
- `2048`: Cap output_length to 2,048 tokens
193+
194+
Example: Cap output lengths to 2,048 tokens:
195+
```bash
196+
aiperf profile \
197+
--input-file traces/production.jsonl \
198+
--custom-dataset-type mooncake_trace \
199+
--synthesis-max-osl 2048 \
200+
...
201+
```
202+
187203
## Advanced Examples
188204

189205
### Scenario 1: Simulate High Cache Hit Rate

src/aiperf/common/bootstrap.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,17 @@ async def _run_service():
7979

8080
ensure_modules_loaded()
8181

82+
if service_class.__name__ in ("Worker", "TimingManager"):
83+
# Disable garbage collection in child processes to prevent unpredictable latency spikes.
84+
# Only required in timing critical services such as Worker and TimingManager.
85+
import gc
86+
87+
for _ in range(3): # Run 3 times to ensure all objects are collected
88+
gc.collect()
89+
gc.freeze()
90+
gc.set_threshold(0)
91+
gc.disable()
92+
8293
# Load and apply custom GPU metrics in child process
8394
if user_config.gpu_telemetry_metrics_file:
8495
from aiperf.gpu_telemetry import constants

src/aiperf/common/config/input_config.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,18 @@ def validate_synthesis_requires_mooncake_trace(self) -> Self:
109109
options and defer validation to runtime when the actual type is determined.
110110
"""
111111
if (
112-
self.synthesis.should_synthesize()
112+
(
113+
self.synthesis.should_synthesize()
114+
or self.synthesis.max_isl is not None
115+
or self.synthesis.max_osl is not None
116+
)
113117
and self.custom_dataset_type is not None
114118
and self.custom_dataset_type != CustomDatasetType.MOONCAKE_TRACE
115119
):
116120
raise ValueError(
117121
"Synthesis options (--synthesis-speedup-ratio, --synthesis-prefix-len-multiplier, "
118-
"--synthesis-prefix-root-multiplier, --synthesis-prompt-len-multiplier) "
122+
"--synthesis-prefix-root-multiplier, --synthesis-prompt-len-multiplier, "
123+
"--synthesis-max-isl, --synthesis-max-osl) "
119124
"require --custom-dataset-type mooncake_trace"
120125
)
121126
return self

src/aiperf/common/config/synthesis_config.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class SynthesisConfig(BaseConfig):
4141
Field(
4242
default=1,
4343
ge=1,
44-
description="Number of times to replicate the radix tree structure",
44+
description="Number of independent radix trees to distribute traces across",
4545
),
4646
CLIParameter(name=("--synthesis-prefix-root-multiplier",), group=_CLI_GROUP),
4747
] = 1
@@ -61,16 +61,28 @@ class SynthesisConfig(BaseConfig):
6161
Field(
6262
default=None,
6363
ge=1,
64-
description="Maximum input sequence length to include in synthesis",
64+
description="Maximum input sequence length for filtering. Traces with input_length > max_isl are skipped.",
6565
),
6666
CLIParameter(name=("--synthesis-max-isl",), group=_CLI_GROUP),
6767
] = None
6868

69+
max_osl: Annotated[
70+
int | None,
71+
Field(
72+
default=None,
73+
ge=1,
74+
description="Maximum output sequence length cap. Traces with output_length > max_osl are capped to max_osl.",
75+
),
76+
CLIParameter(name=("--synthesis-max-osl",), group=_CLI_GROUP),
77+
] = None
78+
6979
def should_synthesize(self) -> bool:
7080
"""Check if synthesis should be auto-triggered based on non-default values.
7181
82+
max_isl and max_osl are filters and caps, not synthesis transformations, so they don't trigger synthesis.
83+
7284
Returns:
73-
True if any synthesis parameter differs from defaults.
85+
True if any synthesis parameter differs from defaults (excluding max_isl and max_osl)
7486
"""
7587
return (
7688
self.speedup_ratio != 1.0

src/aiperf/common/models/dataset_models.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,46 @@ def metadata(self) -> TurnMetadata:
146146
delay_ms=self.delay,
147147
)
148148

149+
def copy_with_stripped_media(self) -> "Turn":
150+
"""Create a copy of this turn with multimodal data replaced by placeholders.
151+
152+
This preserves text data (needed for tokenization) but replaces potentially
153+
large image/audio/video contents with small placeholder strings. This is
154+
more efficient than a full deep copy followed by stripping.
155+
156+
Returns:
157+
A new Turn with stripped multimodal contents.
158+
"""
159+
return Turn(
160+
model=self.model,
161+
role=self.role,
162+
timestamp=self.timestamp,
163+
delay=self.delay,
164+
max_tokens=self.max_tokens,
165+
texts=[Text(name=t.name, contents=list(t.contents)) for t in self.texts],
166+
images=[
167+
Image(
168+
name=img.name,
169+
contents=[f"image_{i}" for i in range(len(img.contents))],
170+
)
171+
for img in self.images
172+
],
173+
audios=[
174+
Audio(
175+
name=aud.name,
176+
contents=[f"audio_{i}" for i in range(len(aud.contents))],
177+
)
178+
for aud in self.audios
179+
],
180+
videos=[
181+
Video(
182+
name=vid.name,
183+
contents=[f"video_{i}" for i in range(len(vid.contents))],
184+
)
185+
for vid in self.videos
186+
],
187+
)
188+
149189

150190
class ConversationMetadata(AIPerfBaseModel):
151191
"""Metadata of a conversation."""

src/aiperf/common/models/record_models.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -447,10 +447,6 @@ class RequestRecord(AIPerfBaseModel):
447447
default=None,
448448
description="The original request info.",
449449
)
450-
turns: list[Turn] = Field(
451-
default_factory=list,
452-
description="The actual turns of the request. This will include assistant turns as well as user turns in multi-turn conversations.",
453-
)
454450
request_headers: dict[str, str] | None = Field(
455451
default=None,
456452
description="The headers of the request.",
@@ -510,6 +506,11 @@ class RequestRecord(AIPerfBaseModel):
510506
"Includes detailed timing for connection establishment, DNS resolution, request/response events, etc. "
511507
"The type of the trace data is determined by the transport and library used.",
512508
)
509+
turns: list[Turn] = Field(
510+
default_factory=list,
511+
description="Deep copy of the request turns. This is a copy of the turns from request_info, "
512+
"made to avoid mutating the original session data when stripping multimodal content.",
513+
)
513514

514515
@field_validator("trace_data", mode="before")
515516
@classmethod

0 commit comments

Comments
 (0)