ai-dynamo
diff --git a/‎docs/api/synthesis.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/api/synthesis.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/cli_options.md‎
Lines changed: 6 additions & 2 deletions b/‎docs/cli_options.md‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎docs/tutorials/prefix-synthesis.md‎
Lines changed: 25 additions & 9 deletions b/‎docs/tutorials/prefix-synthesis.md‎
Lines changed: 25 additions & 9 deletions
diff --git a/‎src/aiperf/common/bootstrap.py‎
Lines changed: 11 additions & 0 deletions b/‎src/aiperf/common/bootstrap.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/aiperf/common/config/input_config.py‎
Lines changed: 7 additions & 2 deletions b/‎src/aiperf/common/config/input_config.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎src/aiperf/common/config/synthesis_config.py‎
Lines changed: 15 additions & 3 deletions b/‎src/aiperf/common/config/synthesis_config.py‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎src/aiperf/common/models/dataset_models.py‎
Lines changed: 40 additions & 0 deletions b/‎src/aiperf/common/models/dataset_models.py‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎src/aiperf/common/models/record_models.py‎
Lines changed: 5 additions & 4 deletions b/‎src/aiperf/common/models/record_models.py‎
Lines changed: 5 additions & 4 deletions
@@ -348,7 +348,7 @@ Parameters for synthetic trace generation.
 **Fields:**
 - `speedup_ratio: float = 1.0` - Timestamp scaling multiplier (ge 0.0)
 - `prefix_len_multiplier: float = 1.0` - Core prefix length multiplier (ge 0.0)
-- `prefix_root_multiplier: int = 1` - Tree replication factor (ge 1)
+- `prefix_root_multiplier: int = 1` - Number of independent trees to distribute traces across (ge 1)
 - `prompt_len_multiplier: float = 1.0` - Leaf prompt length multiplier (ge 0.0)
 - `max_isl: int | None = None` - Maximum input sequence length filter
 - `block_size: int = 512` - KV cache page size (ge 1)
 
@@ -485,7 +485,7 @@ Multiplier for core prefix branch lengths in radix tree.
 
 #### `--synthesis-prefix-root-multiplier` `<int>`
 
-Number of times to replicate the radix tree structure.
+Number of independent radix trees to distribute traces across.
 <br>_Default: `1`_
 
 #### `--synthesis-prompt-len-multiplier` `<float>`
@@ -495,7 +495,11 @@ Multiplier for leaf path (unique prompt) lengths.
 
 #### `--synthesis-max-isl` `<int>`
 
-Maximum input sequence length to include in synthesis.
+Maximum input sequence length for filtering. Traces with input_length > max_isl are skipped.
+
+#### `--synthesis-max-osl` `<int>`
+
+Maximum output sequence length cap. Traces with output_length > max_osl are capped to max_osl.
 
 ### Conversation Input
 
 
@@ -141,12 +141,14 @@ aiperf profile \
 ```
 
 #### `--synthesis-prefix-root-multiplier` (default: 1)
-Replicate the prefix tree structure N times:
-- `1`: No replication
-- `2`: Double the number of unique prefix combinations
-- `3`: Triple the number of unique prefix combinations
+Distribute traces across N independent radix trees:
+- `1`: All traces share the same prefix tree (default)
+- `2`: Traces randomly assigned to 2 independent trees (50% each)
+- `3`: Traces randomly assigned to 3 independent trees (33% each)
 
-Example: Generate more diverse prefix patterns:
+Each tree has identical structure but different hash IDs, so traces in different trees cannot share prefixes. This reduces the effective cache hit rate by splitting the workload.
+
+Example: Simulate lower cache hit rates with more diverse prefix roots:
 ```bash
 aiperf profile \
     --input-file traces/production.jsonl \
@@ -171,11 +173,11 @@ aiperf profile \
 ```
 
 #### `--synthesis-max-isl` (optional)
-Cap the maximum input sequence length:
-- Not set: No cap
-- `4096`: Maximum 4,096 tokens per request
+Filter traces by maximum input sequence length. Traces with input_length > max_isl are skipped:
+- Not set: No filtering
+- `4096`: Skip traces with more than 4,096 input tokens
 
-Example: Test with bounded context:
+Example: Filter out long contexts:
 ```bash
 aiperf profile \
     --input-file traces/production.jsonl \
@@ -184,6 +186,20 @@ aiperf profile \
     ...
 ```
 
+#### `--synthesis-max-osl` (optional)
+Cap traces to a maximum output sequence length. Traces with output_length > max_osl are capped to max_osl:
+- Not set: No capping
+- `2048`: Cap output_length to 2,048 tokens
+
+Example: Cap output lengths to 2,048 tokens:
+```bash
+aiperf profile \
+    --input-file traces/production.jsonl \
+    --custom-dataset-type mooncake_trace \
+    --synthesis-max-osl 2048 \
+    ...
+```
+
 ## Advanced Examples
 
 ### Scenario 1: Simulate High Cache Hit Rate
 
@@ -79,6 +79,17 @@ async def _run_service():
 
         ensure_modules_loaded()
 
+        if service_class.__name__ in ("Worker", "TimingManager"):
+            # Disable garbage collection in child processes to prevent unpredictable latency spikes.
+            # Only required in timing critical services such as Worker and TimingManager.
+            import gc
+
+            for _ in range(3):  # Run 3 times to ensure all objects are collected
+                gc.collect()
+            gc.freeze()
+            gc.set_threshold(0)
+            gc.disable()
+
         # Load and apply custom GPU metrics in child process
         if user_config.gpu_telemetry_metrics_file:
             from aiperf.gpu_telemetry import constants
 
@@ -109,13 +109,18 @@ def validate_synthesis_requires_mooncake_trace(self) -> Self:
         options and defer validation to runtime when the actual type is determined.
         """
         if (
-            self.synthesis.should_synthesize()
+            (
+                self.synthesis.should_synthesize()
+                or self.synthesis.max_isl is not None
+                or self.synthesis.max_osl is not None
+            )
             and self.custom_dataset_type is not None
             and self.custom_dataset_type != CustomDatasetType.MOONCAKE_TRACE
         ):
             raise ValueError(
                 "Synthesis options (--synthesis-speedup-ratio, --synthesis-prefix-len-multiplier, "
-                "--synthesis-prefix-root-multiplier, --synthesis-prompt-len-multiplier) "
+                "--synthesis-prefix-root-multiplier, --synthesis-prompt-len-multiplier, "
+                "--synthesis-max-isl, --synthesis-max-osl) "
                 "require --custom-dataset-type mooncake_trace"
             )
         return self
 
@@ -41,7 +41,7 @@ class SynthesisConfig(BaseConfig):
         Field(
             default=1,
             ge=1,
-            description="Number of times to replicate the radix tree structure",
+            description="Number of independent radix trees to distribute traces across",
         ),
         CLIParameter(name=("--synthesis-prefix-root-multiplier",), group=_CLI_GROUP),
     ] = 1
@@ -61,16 +61,28 @@ class SynthesisConfig(BaseConfig):
         Field(
             default=None,
             ge=1,
-            description="Maximum input sequence length to include in synthesis",
+            description="Maximum input sequence length for filtering. Traces with input_length > max_isl are skipped.",
         ),
         CLIParameter(name=("--synthesis-max-isl",), group=_CLI_GROUP),
     ] = None
 
+    max_osl: Annotated[
+        int | None,
+        Field(
+            default=None,
+            ge=1,
+            description="Maximum output sequence length cap. Traces with output_length > max_osl are capped to max_osl.",
+        ),
+        CLIParameter(name=("--synthesis-max-osl",), group=_CLI_GROUP),
+    ] = None
+
     def should_synthesize(self) -> bool:
         """Check if synthesis should be auto-triggered based on non-default values.
 
+        max_isl and max_osl are filters and caps, not synthesis transformations, so they don't trigger synthesis.
+
         Returns:
-            True if any synthesis parameter differs from defaults.
+            True if any synthesis parameter differs from defaults (excluding max_isl and max_osl)
         """
         return (
             self.speedup_ratio != 1.0
 
@@ -146,6 +146,46 @@ def metadata(self) -> TurnMetadata:
             delay_ms=self.delay,
         )
 
+    def copy_with_stripped_media(self) -> "Turn":
+        """Create a copy of this turn with multimodal data replaced by placeholders.
+
+        This preserves text data (needed for tokenization) but replaces potentially
+        large image/audio/video contents with small placeholder strings. This is
+        more efficient than a full deep copy followed by stripping.
+
+        Returns:
+            A new Turn with stripped multimodal contents.
+        """
+        return Turn(
+            model=self.model,
+            role=self.role,
+            timestamp=self.timestamp,
+            delay=self.delay,
+            max_tokens=self.max_tokens,
+            texts=[Text(name=t.name, contents=list(t.contents)) for t in self.texts],
+            images=[
+                Image(
+                    name=img.name,
+                    contents=[f"image_{i}" for i in range(len(img.contents))],
+                )
+                for img in self.images
+            ],
+            audios=[
+                Audio(
+                    name=aud.name,
+                    contents=[f"audio_{i}" for i in range(len(aud.contents))],
+                )
+                for aud in self.audios
+            ],
+            videos=[
+                Video(
+                    name=vid.name,
+                    contents=[f"video_{i}" for i in range(len(vid.contents))],
+                )
+                for vid in self.videos
+            ],
+        )
+
 
 class ConversationMetadata(AIPerfBaseModel):
     """Metadata of a conversation."""
 
@@ -447,10 +447,6 @@ class RequestRecord(AIPerfBaseModel):
         default=None,
         description="The original request info.",
     )
-    turns: list[Turn] = Field(
-        default_factory=list,
-        description="The actual turns of the request. This will include assistant turns as well as user turns in multi-turn conversations.",
-    )
     request_headers: dict[str, str] | None = Field(
         default=None,
         description="The headers of the request.",
@@ -510,6 +506,11 @@ class RequestRecord(AIPerfBaseModel):
         "Includes detailed timing for connection establishment, DNS resolution, request/response events, etc. "
         "The type of the trace data is determined by the transport and library used.",
     )
+    turns: list[Turn] = Field(
+        default_factory=list,
+        description="Deep copy of the request turns. This is a copy of the turns from request_info, "
+        "made to avoid mutating the original session data when stripping multimodal content.",
+    )
 
     @field_validator("trace_data", mode="before")
     @classmethod