ai-dynamo · ganeshku1 · Jan 17, 2026
diff --git a/docs/benchmark_modes/trace_replay.md b/docs/benchmark_modes/trace_replay.md
@@ -58,7 +58,7 @@ Example entry:
 
 ## Profile using a Custom Trace File
 
-Create a trace file with timing information:
+Create a trace file with timing information and run AIPerf:
 
 <!-- aiperf-run-vllm-default-openai-endpoint-server -->
 ```bash
@@ -67,12 +67,7 @@ cat > custom_trace.jsonl << 'EOF'
 {"timestamp": 10535, "input_length": 672, "output_length": 26, "hash_ids": [0, 3, 4, 5]}
 {"timestamp": 27482, "input_length": 655, "output_length": 52, "hash_ids": [0, 6, 7]}
 EOF
-```
-<!-- /aiperf-run-vllm-default-openai-endpoint-server -->
-Run AIPerf with the trace file:
 
-<!-- aiperf-run-vllm-default-openai-endpoint-server -->
-```bash
 aiperf profile \
     --model Qwen/Qwen3-0.6B \
     --endpoint-type chat \
@@ -93,11 +88,9 @@ For real-world benchmarking, use the FAST25 production trace data from the Moonc
 <!-- aiperf-run-vllm-default-openai-endpoint-server -->
 ```bash
 # Download the Mooncake trace data
-curl -o mooncake_trace.jsonl https://raw.githubusercontent.com/kvcache-ai/Mooncake/refs/heads/main/FAST25-release/arxiv-trace/mooncake_trace.jsonl
-
+curl -L -o mooncake_trace.jsonl https://raw.githubusercontent.com/kvcache-ai/Mooncake/refs/heads/main/FAST25-release/arxiv-trace/mooncake_trace.jsonl && \
 # Create a subset for quick testing
-head -n 10 mooncake_trace.jsonl > mooncake_trace_short.jsonl
-
+head -n 10 mooncake_trace.jsonl > mooncake_trace_short.jsonl && \
 # Run the trace replay
 aiperf profile \
     --model Qwen/Qwen3-0.6B \

diff --git a/docs/tutorials/fixed-schedule.md b/docs/tutorials/fixed-schedule.md
@@ -93,7 +93,21 @@ Execute only a portion of the schedule using start and end offsets:
 
 <!-- aiperf-run-vllm-default-openai-endpoint-server -->
 ```bash
-# Execute schedule from 2s to 6s window
+# Create a fixed schedule with precise timing
+cat > precise_schedule.jsonl << 'EOF'
+{"timestamp": 0, "input_length": 100, "hash_ids": [3001]}
+{"timestamp": 500, "input_length": 200, "hash_ids": [3002]}
+{"timestamp": 750, "input_length": 150, "hash_ids": [3003]}
+{"timestamp": 1000, "input_length": 300, "hash_ids": [3004]}
+{"timestamp": 1250, "input_length": 180, "hash_ids": [3005]}
+{"timestamp": 2000, "input_length": 400, "hash_ids": [3006]}
+{"timestamp": 2500, "input_length": 250, "hash_ids": [3007]}
+{"timestamp": 3000, "input_length": 350, "hash_ids": [3008]}
+{"timestamp": 4000, "input_length": 500, "hash_ids": [3009]}
+{"timestamp": 5000, "input_length": 600, "hash_ids": [3010, 3050]}
+EOF
+
+# Execute schedule from 2s to 4s window
 aiperf profile \
     --model Qwen/Qwen3-0.6B \
     --endpoint-type chat \