-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.pbtxt
More file actions
68 lines (63 loc) · 2.56 KB
/
Copy pathexample.pbtxt
File metadata and controls
68 lines (63 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Full-system profiler configuration
# All fields are optional — defaults are applied for missing values.
# PID 0 is resolved to the current process PID at runtime.
# Output directory for all .pb files (created if it doesn't exist).
# Each component's output_file is relative to this directory.
# Empty or omitted = current working directory.
output_dir: "profiling_output"
gpu {
enabled: true
# Multi-device sampling: list one entry per device to profile.
# Empty = device 0. Same metrics list is applied across all devices.
device_indices: 0
sampling_frequency_hz: 10000 # 10 kHz
hw_buffer_size: 536870912 # 512 MB
max_samples: 50000
# SM utilization as % of peak — emitted directly by CUPTI rather
# than computed post-hoc from raw cycle counts (the new
# descriptor-driven renderer plots metrics in their native unit).
metrics: "sm__cycles_active.avg.pct_of_peak_sustained_elapsed"
metrics: "sm__cycles_active.max.pct_of_peak_sustained_elapsed"
# Active warps per active SM cycle (0-64 on H100).
metrics: "sm__warps_active.avg.per_cycle_active"
metrics: "sm__warps_active.max.per_cycle_active"
# DRAM bandwidth as % of peak.
metrics: "dram__read_throughput.avg.pct_of_peak_sustained_elapsed"
metrics: "dram__write_throughput.avg.pct_of_peak_sustained_elapsed"
# PCIe host<->device throughput in bytes/sec (and the raw window
# byte counter — cumsum gives total bytes transferred).
metrics: "pcie__read_bytes.sum.per_second"
metrics: "pcie__write_bytes.sum.per_second"
metrics: "pcie__read_bytes.sum"
metrics: "pcie__write_bytes.sum"
# NVLink rx/tx throughput in bytes/sec.
metrics: "nvlrx__bytes.sum.per_second"
metrics: "nvltx__bytes.sum.per_second"
flush_interval_ms: 10000
output_file: "gpu_metrics.pb"
}
system {
enabled: true
sampling_frequency_hz: 100 # 100 Hz
# Processes to track per-process. Each entry is { pid, alias } —
# alias is optional (omit it for plain "PID xxx" labels in the
# visualizer; otherwise the legend shows "<alias> (PID xxx)").
# pid: 0 is resolved to the current process at runtime.
processes { pid: 0 alias: "self" }
flush_interval_ms: 5000
output_file: "system_metrics.pb"
}
disk {
enabled: true
sampling_frequency_hz: 100 # 100 Hz
devices: "md0"
devices: "nvme7n1"
processes { pid: 0 alias: "self" } # see `system` block above
flush_interval_ms: 5000
output_file: "disk_metrics.pb"
}
events {
enabled: true
flush_interval_ms: 1000
output_file: "events.pb"
}