Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/config-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,9 @@ Profiling configuration for nsys or torch profiler.
profiling:
type: "nsys" # "none", "nsys", or "torch"

# Extra arguments for nsys profile (when type is nsys or nsys-time)
extra_nsys_args: ["--stats=true"] # Optional: list of strings

# Phase-specific profiling step configs
prefill:
start_step: 10 # Step to start profiling
Expand All @@ -677,6 +680,7 @@ profiling:
| Field | Type | Required | Default | Description |
| ------------- | ------ | -------- | ------- | ---------------------------------------- |
| `type` | string | No | "none" | Profiling type: "none", "nsys", "torch" |
| `extra_nsys_args` | list[string] | No | null | Extra args for nsys profile (when type is `nsys` or `nsys-time`) |
| `prefill` | object | Disaggregated | null | Prefill phase config |
| `decode` | object | Disaggregated | null | Decode phase config |
| `aggregated` | object | Aggregated | null | Aggregated phase config |
Expand Down Expand Up @@ -730,6 +734,7 @@ resources:

profiling:
type: "nsys"
extra_nsys_args: ["--stats=true", "--trace=osrt"]
aggregated:
start_step: 10
stop_step: 25
Expand Down
6 changes: 6 additions & 0 deletions docs/profiling.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ profiling:
profiling:
type: "torch" # Required: "none", "torch", or "nsys"

# nsys / nsys-time: extra arguments for nsys profile (e.g. ["--stats=true"])
extra_nsys_args: [] # Optional

# Disaggregated mode: must set both prefill and decode sections
prefill:
start_step: 0 # Step to start profiling for prefill workers
Expand Down Expand Up @@ -111,10 +114,13 @@ When using `nsys`, workers are wrapped with:
```bash
nsys profile -t cuda,nvtx --cuda-graph-trace=node \
-c cudaProfilerApi --capture-range-end stop \
[extra_nsys_args...] \
-o /logs/profiles/{mode}/{name} \
python3 -m sglang.launch_server ...
```

You can pass extra arguments via `profiling.extra_nsys_args` (e.g. `["--stats=true", "--trace=osrt"]`).

## Example Configurations

### Torch Profiler (Recommended for Python analysis)
Expand Down
13 changes: 11 additions & 2 deletions src/srtctl/core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,9 @@ class ProfilingConfig:

type: str = "none" # "none", "nsys", "nsys-time", or "torch"

# Extra arguments passed to nsys profile (appended before `-o`; see get_nsys_prefix)
extra_nsys_args: list[str] | None = None

# Phase-specific profiling step configs (not used for nsys-time)
prefill: ProfilingPhaseConfig | None = None
decode: ProfilingPhaseConfig | None = None
Expand Down Expand Up @@ -768,6 +771,9 @@ def _get_nsys_prefix_trtllm(self, output_file: str) -> list[str]:
"stop",
]

if self.extra_nsys_args:
cmd.extend(self.extra_nsys_args)

cmd += [
"--kill",
"none",
Expand Down Expand Up @@ -814,10 +820,13 @@ def get_nsys_prefix(
"stop",
"--force-overwrite",
"true",
"-o",
output_file,
]

if self.extra_nsys_args:
cmd.extend(self.extra_nsys_args)

cmd.extend(["-o", output_file])

if frontend_type == "dynamo":
cmd.insert(-2, "--trace-fork-before-exec=true")

Expand Down
32 changes: 32 additions & 0 deletions tests/test_profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,38 @@ def test_nsys_profiling(self):
prefix_router = profiling.get_nsys_prefix("/output/test", frontend_type="sglangrouter")
assert "--trace-fork-before-exec=true" not in prefix_router

def test_nsys_profiling_with_extra_args(self):
"""Test nsys profiling with custom extra_nsys_args."""
from srtctl.core.schema import ProfilingConfig

profiling = ProfilingConfig(
type="nsys",
extra_nsys_args=["--stats=true", "--trace=osrt"],
)

prefix = profiling.get_nsys_prefix("/output/test")
assert "nsys" in prefix
assert "profile" in prefix
assert "/output/test" in prefix
assert "--stats=true" in prefix
assert "--trace=osrt" in prefix
# Extra args appear before -o output
o_idx = prefix.index("-o")
stats_idx = prefix.index("--stats=true")
assert stats_idx < o_idx

def test_nsys_trtllm_prefix_includes_extra_args(self):
"""TRTLLM nsys wrap should honor extra_nsys_args (same ordering as default path: before -o)."""
from srtctl.core.schema import ProfilingConfig

profiling = ProfilingConfig(
type="nsys",
extra_nsys_args=["--stats=true"],
)
prefix = profiling.get_nsys_prefix("/out/rank", backend_type="trtllm")
assert "--stats=true" in prefix
assert prefix.index("--stats=true") < prefix.index("-o")

def test_torch_profiling(self):
"""Test torch profiling configuration."""
from srtctl.core.schema import ProfilingConfig, ProfilingPhaseConfig
Expand Down
Loading