Skip to content

Commit 5c394fd

Browse files
hexagon: profiler output fix and script updates (#24042)
* hex-ops: fix profiler output (ie remove the redundant NONEs) * hex-prof: update profiling script to support tot.usec column
1 parent 4fb16ec commit 5c394fd

2 files changed

Lines changed: 31 additions & 17 deletions

File tree

ggml/src/ggml-hexagon/htp-opnode.h

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,20 @@ struct htp_opnode {
5656
}
5757

5858
std::vector<const ggml_tensor *> get_inputs() const {
59+
if (fused.empty()) {
60+
int last_non_null = -1;
61+
for (int i = 0; i < GGML_MAX_SRC; i++) {
62+
if (node->src[i]) {
63+
last_non_null = i;
64+
}
65+
}
66+
std::vector<const ggml_tensor *> inputs(last_non_null + 1, nullptr);
67+
for (int i = 0; i <= last_non_null; i++) {
68+
inputs[i] = node->src[i];
69+
}
70+
return inputs;
71+
}
72+
5973
std::vector<const ggml_tensor *> inputs(GGML_MAX_SRC, nullptr);
6074
std::vector<const ggml_tensor *> outputs;
6175
outputs.push_back(node);
@@ -82,12 +96,8 @@ struct htp_opnode {
8296
};
8397

8498
for (int i = 0; i < GGML_MAX_SRC; i++) {
85-
if (fused.empty()) {
86-
inputs[i] = node->src[i];
87-
} else {
88-
if (node->src[i]) {
89-
add_input(node->src[i]);
90-
}
99+
if (node->src[i]) {
100+
add_input(node->src[i]);
91101
}
92102
}
93103
for (const auto * f : fused) {
@@ -98,10 +108,7 @@ struct htp_opnode {
98108
}
99109
}
100110

101-
if (!fused.empty()) {
102-
inputs.resize(count);
103-
}
104-
111+
inputs.resize(count);
105112
return inputs;
106113
}
107114

scripts/snapdragon/ggml-hexagon-profile.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
# Mapping of cli-friendly names to (internal_data_key, Display Header, numeric_sort_key)
1313
COL_MAP = {
14+
"tot-usec": ("tot_usec", "Tot usec", "_sort_tot_usec"),
1415
"op": ("op", "Op", "op"),
1516
"dims": ("dims", "Dims", "dims"),
1617
"dtypes": ("dtypes", "DTypes", "dtypes"),
@@ -24,7 +25,7 @@
2425
}
2526

2627
op_pattern = re.compile(
27-
r"profile-op\s+(?P<op_name>[A-Z_0-9+]+):\s+.*?\s+:\s+(?P<dims>[\d:x\s\->!]+)\s+:\s+(?P<types>[a-z\d_\s\->x]+)\s+:\s+.*?\s+usec\s+(?P<usec>\d+)\s+cycles\s+(?P<cycles>\d+)(?:\s+pmu\s+\[(?P<pmu>[\d,\s]+)\])?"
28+
r"profile-op\s+(?P<op_name>[A-Z_0-9+]+):\s+.*?\s+:\s+(?P<dims>[\d:x\s\->!]+)\s+:\s+(?P<types>[a-z\d_\s\->x]+)\s+:\s+.*?\s+(?:op-)?usec\s+(?P<usec>\d+)\s+(?:op-)?cycles\s+(?P<cycles>\d+)(?:\s+pmu\s+\[(?P<pmu>[\d,\s]+)\])?"
2829
)
2930

3031
logger = logging.getLogger("ggml-hexagon-profile")
@@ -85,21 +86,27 @@ def generate_report(ops, top_n, width_overrides, sort_col, pmu_name=None):
8586
cycles = [o['cycles'] for o in group_ops]
8687
pmu_vals = [o['pmu_val'] for o in group_ops if o['pmu_val'] is not None]
8788

89+
avg_usec_val = statistics.mean(usecs)
90+
count_val = len(group_ops)
91+
tot_usec_val = avg_usec_val * count_val
92+
8893
group_stats.append({
8994
'op': name,
9095
'dims': dims,
9196
'dtypes': types,
92-
'count': str(len(group_ops)),
97+
'count': str(count_val),
9398
'max_usec': str(max(usecs)),
94-
'avg_usec': f"{statistics.mean(usecs):.2f}",
99+
'avg_usec': f"{avg_usec_val:.2f}",
100+
'tot_usec': f"{tot_usec_val:.2f}",
95101
'max_cycles': str(max(cycles)),
96102
'avg_cycles': f"{statistics.mean(cycles):.2f}",
97103
'max_pmu': str(max(pmu_vals)) if pmu_vals else "0",
98104
'avg_pmu': f"{statistics.mean(pmu_vals):.2f}" if pmu_vals else "0.00",
99105
# Numeric values for accurate sorting
100-
'_sort_count': len(group_ops),
106+
'_sort_count': count_val,
101107
'_sort_max_usec': max(usecs),
102-
'_sort_avg_usec': statistics.mean(usecs),
108+
'_sort_avg_usec': avg_usec_val,
109+
'_sort_tot_usec': tot_usec_val,
103110
'_sort_max_cycles': max(cycles),
104111
'_sort_avg_cycles': statistics.mean(cycles),
105112
'_sort_max_pmu': max(pmu_vals) if pmu_vals else 0,
@@ -116,7 +123,7 @@ def generate_report(ops, top_n, width_overrides, sort_col, pmu_name=None):
116123
active_cols = ["op", "dims", "dtypes"]
117124
if pmu_name:
118125
active_cols += ["max-pmu", "avg-pmu"]
119-
active_cols += ["max-usec", "avg-usec", "max-cycles", "avg-cycles", "count"]
126+
active_cols += ["tot-usec", "avg-usec", "avg-cycles", "max-usec", "max-cycles", "count"]
120127

121128
final_headers, final_keys, final_widths = [], [], []
122129

@@ -156,7 +163,7 @@ def main():
156163
parser = argparse.ArgumentParser(description="Post-process Op profile info.")
157164
parser.add_argument("logfile")
158165
parser.add_argument("-n", "--top", type=int, default=100)
159-
parser.add_argument("--sort", type=str, default="max-usec", choices=list(COL_MAP.keys()))
166+
parser.add_argument("--sort", type=str, default="tot-usec", choices=list(COL_MAP.keys()))
160167
parser.add_argument("--pmu-index", type=int)
161168
parser.add_argument("--pmu-name", type=str)
162169
parser.add_argument("--width", action='append', default=['dims:40'], help="Override column width, e.g. --width dims:50")

0 commit comments

Comments
 (0)