system_profiling/configs/example.pbtxt at master · 0-EricZhou-0/system_profiling · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Full-system profiler configuration
# All fields are optional — defaults are applied for missing values.
# PID 0 is resolved to the current process PID at runtime.

# Output directory for all .pb files (created if it doesn't exist).
# Each component's output_file is relative to this directory.
# Empty or omitted = current working directory.
output_dir: "profiling_output"

gpu {
    enabled: true
    # Multi-device sampling: list one entry per device to profile.
    # Empty = device 0. Same metrics list is applied across all devices.
    device_indices: 0
    sampling_frequency_hz: 10000    # 10 kHz
    hw_buffer_size: 536870912       # 512 MB
    max_samples: 50000
    # SM utilization as % of peak — emitted directly by CUPTI rather
    # than computed post-hoc from raw cycle counts (the new
    # descriptor-driven renderer plots metrics in their native unit).
    metrics: "sm__cycles_active.avg.pct_of_peak_sustained_elapsed"
    metrics: "sm__cycles_active.max.pct_of_peak_sustained_elapsed"
    # Active warps per active SM cycle (0-64 on H100).
    metrics: "sm__warps_active.avg.per_cycle_active"
    metrics: "sm__warps_active.max.per_cycle_active"
    # DRAM bandwidth as % of peak.
    metrics: "dram__read_throughput.avg.pct_of_peak_sustained_elapsed"
    metrics: "dram__write_throughput.avg.pct_of_peak_sustained_elapsed"
    # PCIe host<->device throughput in bytes/sec (and the raw window
    # byte counter — cumsum gives total bytes transferred).
    metrics: "pcie__read_bytes.sum.per_second"
    metrics: "pcie__write_bytes.sum.per_second"
    metrics: "pcie__read_bytes.sum"
    metrics: "pcie__write_bytes.sum"
    # NVLink rx/tx throughput in bytes/sec.
    metrics: "nvlrx__bytes.sum.per_second"
    metrics: "nvltx__bytes.sum.per_second"
    flush_interval_ms: 10000
    output_file: "gpu_metrics.pb"
}

system {
    enabled: true
    sampling_frequency_hz: 100      # 100 Hz
    # Processes to track per-process. Each entry is { pid, alias } —
    # alias is optional (omit it for plain "PID xxx" labels in the
    # visualizer; otherwise the legend shows "<alias> (PID xxx)").
    # pid: 0 is resolved to the current process at runtime.
    processes { pid: 0  alias: "self" }
    flush_interval_ms: 5000
    output_file: "system_metrics.pb"
}

disk {
    enabled: true
    sampling_frequency_hz: 100      # 100 Hz
    devices: "md0"
    devices: "nvme7n1"
    processes { pid: 0  alias: "self" }   # see `system` block above
    flush_interval_ms: 5000
    output_file: "disk_metrics.pb"
}

events {
    enabled: true
    flush_interval_ms: 1000
    output_file: "events.pb"
}