Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/config-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,9 @@ Profiling configuration for nsys or torch profiler.
profiling:
type: "nsys" # "none", "nsys", or "torch"

# Extra arguments for nsys profile (when type is nsys or nsys-time)
extra_nsys_args: ["--stats=true"] # Optional: list of strings

# Phase-specific profiling step configs
prefill:
start_step: 10 # Step to start profiling
Expand All @@ -677,6 +680,7 @@ profiling:
| Field | Type | Required | Default | Description |
| ------------- | ------ | -------- | ------- | ---------------------------------------- |
| `type` | string | No | "none" | Profiling type: "none", "nsys", "torch" |
| `extra_nsys_args` | list[string] | No | null | Extra args for nsys profile (when type is `nsys` or `nsys-time`) |
| `prefill` | object | Disaggregated | null | Prefill phase config |
| `decode` | object | Disaggregated | null | Decode phase config |
| `aggregated` | object | Aggregated | null | Aggregated phase config |
Expand Down Expand Up @@ -730,6 +734,7 @@ resources:

profiling:
type: "nsys"
extra_nsys_args: ["--stats=true", "--trace=osrt"]
aggregated:
start_step: 10
stop_step: 25
Expand Down
6 changes: 6 additions & 0 deletions docs/profiling.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ profiling:
profiling:
type: "torch" # Required: "none", "torch", or "nsys"

# nsys / nsys-time: extra arguments for nsys profile (e.g. ["--stats=true"])
extra_nsys_args: [] # Optional

# Disaggregated mode: must set both prefill and decode sections
prefill:
start_step: 0 # Step to start profiling for prefill workers
Expand Down Expand Up @@ -111,10 +114,13 @@ When using `nsys`, workers are wrapped with:
```bash
nsys profile -t cuda,nvtx --cuda-graph-trace=node \
-c cudaProfilerApi --capture-range-end stop \
[extra_nsys_args...] \
-o /logs/profiles/{mode}/{name} \
python3 -m sglang.launch_server ...
```

You can pass extra arguments via `profiling.extra_nsys_args` (e.g. `["--stats=true", "--trace=osrt"]`).

## Example Configurations

### Torch Profiler (Recommended for Python analysis)
Expand Down
13 changes: 11 additions & 2 deletions src/srtctl/core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,9 @@ class ProfilingConfig:

type: str = "none" # "none", "nsys", "nsys-time", or "torch"

# Extra arguments passed to nsys profile (appended before `-o`; see get_nsys_prefix)
extra_nsys_args: list[str] | None = None

# Phase-specific profiling step configs (not used for nsys-time)
prefill: ProfilingPhaseConfig | None = None
decode: ProfilingPhaseConfig | None = None
Expand Down Expand Up @@ -768,6 +771,9 @@ def _get_nsys_prefix_trtllm(self, output_file: str) -> list[str]:
"stop",
]

if self.extra_nsys_args:
cmd.extend(self.extra_nsys_args)

cmd += [
"--kill",
"none",
Expand Down Expand Up @@ -814,10 +820,13 @@ def get_nsys_prefix(
"stop",
"--force-overwrite",
"true",
"-o",
output_file,
]

if self.extra_nsys_args:
cmd.extend(self.extra_nsys_args)

cmd.extend(["-o", output_file])

if frontend_type == "dynamo":
cmd.insert(-2, "--trace-fork-before-exec=true")

Expand Down
32 changes: 32 additions & 0 deletions tests/test_profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,38 @@ def test_nsys_profiling(self):
prefix_router = profiling.get_nsys_prefix("/output/test", frontend_type="sglangrouter")
assert "--trace-fork-before-exec=true" not in prefix_router

def test_nsys_profiling_with_extra_args(self):
"""Test nsys profiling with custom extra_nsys_args."""
from srtctl.core.schema import ProfilingConfig

profiling = ProfilingConfig(
type="nsys",
extra_nsys_args=["--stats=true", "--trace=osrt"],
)

prefix = profiling.get_nsys_prefix("/output/test")
assert "nsys" in prefix
assert "profile" in prefix
assert "/output/test" in prefix
assert "--stats=true" in prefix
assert "--trace=osrt" in prefix
# Extra args appear before -o output
o_idx = prefix.index("-o")
stats_idx = prefix.index("--stats=true")
assert stats_idx < o_idx

def test_nsys_trtllm_prefix_includes_extra_args(self):
"""TRTLLM nsys wrap should honor extra_nsys_args (same ordering as default path: before -o)."""
from srtctl.core.schema import ProfilingConfig

profiling = ProfilingConfig(
type="nsys",
extra_nsys_args=["--stats=true"],
)
prefix = profiling.get_nsys_prefix("/out/rank", backend_type="trtllm")
assert "--stats=true" in prefix
assert prefix.index("--stats=true") < prefix.index("-o")

def test_torch_profiling(self):
"""Test torch profiling configuration."""
from srtctl.core.schema import ProfilingConfig, ProfilingPhaseConfig
Expand Down
Loading