Skip to content

Commit bb2b230

Browse files
Add multilocale timing with breakdowns
Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com>
1 parent 7d04d48 commit bb2b230

5 files changed

Lines changed: 256 additions & 122 deletions

File tree

apps/FastOTF2Converter/src/ConverterTimings.chpl

Lines changed: 167 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ module ConverterTimings {
2525
var totalTime: real; // wall-clock for the entire task body
2626
}
2727

28+
record LocaleTiming {
29+
var numTasks: int;
30+
var taskTimingDom = {0..<numTasks};
31+
var taskTimings: [taskTimingDom] TaskTiming;
32+
}
2833

2934
// -------------------------------------------------------------------------
3035
// TimingReport — aggregates all timing data for the final report
@@ -44,8 +49,10 @@ module ConverterTimings {
4449
var mergeTime: real; // only for strategies with a merge phase
4550

4651
// Per-task data
47-
var numTasks: int;
48-
var taskData: [0..<numTasks] TaskTiming;
52+
// var numTasks: int;
53+
// var taskData: [0..<numTasks] TaskTiming;
54+
// Replaced with locale breakdown for better clarity in locgroup strategies
55+
var localeTimingData : [0..<numLocales] LocaleTiming;
4956

5057
// Strategy name for the header
5158
var strategy: string;
@@ -56,10 +63,12 @@ module ConverterTimings {
5663
// -------------------------------------------------------------------------
5764

5865
proc ref TimingReport.setTaskTimings(const ref timings: [] TaskTiming) {
59-
numTasks = timings.size;
60-
taskData = timings;
66+
localeTimingData[here.id] = new LocaleTiming();
67+
ref locData = localeTimingData[here.id];
68+
locData.numTasks = timings.size;
69+
locData.taskTimingDom = {0..<timings.size};
70+
locData.taskTimings = timings;
6171
}
62-
6372
// -------------------------------------------------------------------------
6473
// print — output the formatted timing report
6574
// -------------------------------------------------------------------------
@@ -91,93 +100,169 @@ module ConverterTimings {
91100
writeln(" ─────────────────────────────── ────────── ───────");
92101
printPhase("Total", totalTime);
93102

94-
// Per-task breakdown
95-
if numTasks > 0 {
96-
writeln();
97-
writeln(" Per-Task Breakdown (", numTasks, " tasks):");
98-
writeln(" Task Locations Events Open (s) Setup (s) Read (s) Write Callgraphs (s) Write Metrics (s) Write Total (s) Total (s)");
99-
writeln(" ──── ───────── ────────── ──────── ───────── ──────── ──────────────────── ───────────────── ─────────────── ─────────");
100-
for t in taskData {
101-
writef(" %4u %9u %10u %8.3dr %9.3dr %8.3dr %20.3dr %17.3dr %15.3dr %9.3dr\n",
102-
t.taskId, t.locations, t.eventsRead,
103-
t.openTime, t.setupTime,
104-
t.readTime,
105-
t.callgraphWriteTime, t.metricsWriteTime, t.writeTime,
106-
t.totalTime);
107-
}
108-
writeln(" ──── ───────── ────────── ──────── ───────── ──────── ──────────────────── ───────────────── ─────────────── ─────────");
109-
110-
// Summary stats
111-
var minOpen = taskData[0].openTime, maxOpen = taskData[0].openTime, sumOpen = 0.0;
112-
var minSetup = taskData[0].setupTime, maxSetup = taskData[0].setupTime, sumSetup = 0.0;
113-
var minRead = taskData[0].readTime, maxRead = taskData[0].readTime, sumRead = 0.0;
114-
var minCallgraphWrite = taskData[0].callgraphWriteTime, maxCallgraphWrite = taskData[0].callgraphWriteTime, sumCallgraphWrite = 0.0;
115-
var minMetricsWrite = taskData[0].metricsWriteTime, maxMetricsWrite = taskData[0].metricsWriteTime, sumMetricsWrite = 0.0;
116-
var minWrite = taskData[0].writeTime, maxWrite = taskData[0].writeTime, sumWrite = 0.0;
117-
var minTotal = taskData[0].totalTime, maxTotal = taskData[0].totalTime, sumTotal = 0.0;
118-
var totalEvents: uint(64) = 0;
119-
for t in taskData {
120-
if t.openTime < minOpen then minOpen = t.openTime;
121-
if t.openTime > maxOpen then maxOpen = t.openTime;
122-
sumOpen += t.openTime;
123-
if t.setupTime < minSetup then minSetup = t.setupTime;
124-
if t.setupTime > maxSetup then maxSetup = t.setupTime;
125-
sumSetup += t.setupTime;
126-
if t.readTime < minRead then minRead = t.readTime;
127-
if t.readTime > maxRead then maxRead = t.readTime;
128-
sumRead += t.readTime;
129-
if t.callgraphWriteTime < minCallgraphWrite then minCallgraphWrite = t.callgraphWriteTime;
130-
if t.callgraphWriteTime > maxCallgraphWrite then maxCallgraphWrite = t.callgraphWriteTime;
131-
sumCallgraphWrite += t.callgraphWriteTime;
132-
if t.metricsWriteTime < minMetricsWrite then minMetricsWrite = t.metricsWriteTime;
133-
if t.metricsWriteTime > maxMetricsWrite then maxMetricsWrite = t.metricsWriteTime;
134-
sumMetricsWrite += t.metricsWriteTime;
135-
if t.writeTime < minWrite then minWrite = t.writeTime;
136-
if t.writeTime > maxWrite then maxWrite = t.writeTime;
137-
sumWrite += t.writeTime;
138-
if t.totalTime < minTotal then minTotal = t.totalTime;
139-
if t.totalTime > maxTotal then maxTotal = t.totalTime;
140-
sumTotal += t.totalTime;
141-
totalEvents += t.eventsRead;
103+
// Per-locale, per-task breakdown
104+
var globalTaskCount = 0;
105+
for loc in localeTimingData do globalTaskCount += loc.numTasks;
106+
107+
if globalTaskCount > 0 {
108+
const taskHeader = " Task Locations Events Open (s) Setup (s) Read (s) Write CG (s) Write Met (s) Write Tot (s) Total (s)";
109+
const taskSep = " ──── ───────── ────────── ──────── ───────── ──────── ──────────── ───────────── ───────────── ─────────";
110+
111+
// --- Per-locale sections with per-locale stats ---
112+
for locId in 0..<numLocales {
113+
ref loc = localeTimingData[locId];
114+
if loc.numTasks == 0 then continue;
115+
116+
writeln();
117+
writeln("┌─ Locale ", locId, " (", loc.numTasks, " tasks) ─────────────────────────────────────────");
118+
writeln(taskHeader);
119+
writeln(taskSep);
120+
for t in loc.taskTimings {
121+
writef(" %4u %9u %10u %8.3dr %9.3dr %8.3dr %12.3dr %13.3dr %13.3dr %9.3dr\n",
122+
t.taskId, t.locations, t.eventsRead,
123+
t.openTime, t.setupTime, t.readTime,
124+
t.callgraphWriteTime, t.metricsWriteTime, t.writeTime,
125+
t.totalTime);
126+
}
127+
writeln(taskSep);
128+
129+
// Per-locale summary stats
130+
printTaskStats(loc.taskTimings, loc.numTasks, " ");
131+
writeln("└──────────────────────────────────────────────────────────────────────────");
142132
}
143-
const n = numTasks: real;
144-
const meanOpen = sumOpen / n;
145-
const meanSetup = sumSetup / n;
146-
const meanRead = sumRead / n;
147-
const meanCallgraphWrite = sumCallgraphWrite / n;
148-
const meanMetricsWrite = sumMetricsWrite / n;
149-
const meanWrite = sumWrite / n;
150-
const meanTotal = sumTotal / n;
151-
152-
writef(" %<26s %8.3dr %9.3dr %8.3dr %20.3dr %17.3dr %15.3dr %9.3dr\n",
153-
"Min", minOpen, minSetup, minRead, minCallgraphWrite, minMetricsWrite, minWrite, minTotal);
154-
writef(" %<26s %8.3dr %9.3dr %8.3dr %20.3dr %17.3dr %15.3dr %9.3dr\n",
155-
"Max", maxOpen, maxSetup, maxRead, maxCallgraphWrite, maxMetricsWrite, maxWrite, maxTotal);
156-
writef(" %<26s %8.3dr %9.3dr %8.3dr %20.3dr %17.3dr %15.3dr %9.3dr\n",
157-
"Mean", meanOpen, meanSetup, meanRead, meanCallgraphWrite, meanMetricsWrite, meanWrite, meanTotal);
158-
159-
// Imbalance ratios (max/mean), only if mean > 0
160-
if meanTotal > 0.0 {
161-
const imbOpen = if meanOpen > 0.0 then maxOpen / meanOpen else 0.0;
162-
const imbSetup = if meanSetup > 0.0 then maxSetup / meanSetup else 0.0;
163-
const imbRead = if meanRead > 0.0 then maxRead / meanRead else 0.0;
164-
const imbCallgraphWrite = if meanCallgraphWrite > 0.0 then maxCallgraphWrite / meanCallgraphWrite else 0.0;
165-
const imbMetricsWrite = if meanMetricsWrite > 0.0 then maxMetricsWrite / meanMetricsWrite else 0.0;
166-
const imbWrite = if meanWrite > 0.0 then maxWrite / meanWrite else 0.0;
167-
const imbTotal = maxTotal / meanTotal;
168-
writef(" %<26s %7.3drx %8.3drx %7.3drx %19.3drx %16.3drx %14.3drx %8.3drx\n",
169-
"Imbalance (max/mean)", imbOpen, imbSetup, imbRead, imbCallgraphWrite, imbMetricsWrite, imbWrite, imbTotal);
133+
134+
// --- Global (all-locale) aggregate stats ---
135+
if numLocales > 1 {
136+
writeln();
137+
writeln("╔═ Global Task Summary (all ", globalTaskCount, " tasks across ", numLocales, " locales) ═══════════");
138+
139+
// Collect all tasks into flat view for stats
140+
var allTasks: [0..<globalTaskCount] TaskTiming;
141+
var idx = 0;
142+
for loc in localeTimingData {
143+
for t in loc.taskTimings {
144+
allTasks[idx] = t;
145+
idx += 1;
146+
}
147+
}
148+
printTaskStats(allTasks, globalTaskCount, "");
149+
150+
// Per-locale wall times for inter-locale balance
151+
writeln("");
152+
writeln("║ Per-Locale Wall Times (max task total = locale wall time):");
153+
writeln("║ Locale Tasks Events Wall (s)");
154+
writeln("║ ────── ───── ────────── ────────");
155+
var minLocWall = max(real), maxLocWall = 0.0, sumLocWall = 0.0;
156+
var activeLocales = 0;
157+
for locId in 0..<numLocales {
158+
ref loc = localeTimingData[locId];
159+
if loc.numTasks == 0 then continue;
160+
activeLocales += 1;
161+
var locEvents: uint(64) = 0;
162+
var locWall = 0.0;
163+
for t in loc.taskTimings {
164+
locEvents += t.eventsRead;
165+
if t.totalTime > locWall then locWall = t.totalTime;
166+
}
167+
if locWall < minLocWall then minLocWall = locWall;
168+
if locWall > maxLocWall then maxLocWall = locWall;
169+
sumLocWall += locWall;
170+
writef("%6i %5i %10u %8.3dr",
171+
locId, loc.numTasks, locEvents, locWall);
172+
writeln();
173+
}
174+
if activeLocales > 1 {
175+
const meanLocWall = sumLocWall / activeLocales: real;
176+
const imbLoc = if meanLocWall > 0.0 then maxLocWall / meanLocWall else 0.0;
177+
writeln("║ ────── ───── ────────── ────────");
178+
writef("║ Min: %.3dr Max: %.3dr Mean: %.3dr Imbalance: %.3drx",
179+
minLocWall, maxLocWall, meanLocWall, imbLoc);
180+
writeln();
181+
}
182+
writeln("╚══════════════════════════════════════════════════════════════════════════");
170183
}
171184

172185
if totalTime > 0.0 {
173-
const eventsPerSec = totalEvents: real / totalTime;
174-
writeln(" Total events/sec: ", eventsPerSec);
186+
var totalEvents: uint(64) = 0;
187+
for loc in localeTimingData do for t in loc.taskTimings do totalEvents += t.eventsRead;
188+
writeln();
189+
writef(" Throughput: %.0dr events/sec\n", totalEvents: real / totalTime);
175190
}
176191
}
177192

178193
writeln("=== End Timing Report ===");
179194
}
180195

196+
// -------------------------------------------------------------------------
197+
// printTaskStats — print min/max/mean/imbalance for a set of tasks
198+
// -------------------------------------------------------------------------
199+
200+
proc printTaskStats(const ref tasks, n: int, prefix: string) {
201+
if n == 0 then return;
202+
203+
var minOpen = tasks[0].openTime, maxOpen = tasks[0].openTime, sumOpen = 0.0;
204+
var minSetup = tasks[0].setupTime, maxSetup = tasks[0].setupTime, sumSetup = 0.0;
205+
var minRead = tasks[0].readTime, maxRead = tasks[0].readTime, sumRead = 0.0;
206+
var minCGWrite = tasks[0].callgraphWriteTime, maxCGWrite = tasks[0].callgraphWriteTime, sumCGWrite = 0.0;
207+
var minMetWrite = tasks[0].metricsWriteTime, maxMetWrite = tasks[0].metricsWriteTime, sumMetWrite = 0.0;
208+
var minWrite = tasks[0].writeTime, maxWrite = tasks[0].writeTime, sumWrite = 0.0;
209+
var minTotal = tasks[0].totalTime, maxTotal = tasks[0].totalTime, sumTotal = 0.0;
210+
var totalEvents: uint(64) = 0;
211+
212+
for t in tasks {
213+
if t.openTime < minOpen then minOpen = t.openTime;
214+
if t.openTime > maxOpen then maxOpen = t.openTime;
215+
sumOpen += t.openTime;
216+
if t.setupTime < minSetup then minSetup = t.setupTime;
217+
if t.setupTime > maxSetup then maxSetup = t.setupTime;
218+
sumSetup += t.setupTime;
219+
if t.readTime < minRead then minRead = t.readTime;
220+
if t.readTime > maxRead then maxRead = t.readTime;
221+
sumRead += t.readTime;
222+
if t.callgraphWriteTime < minCGWrite then minCGWrite = t.callgraphWriteTime;
223+
if t.callgraphWriteTime > maxCGWrite then maxCGWrite = t.callgraphWriteTime;
224+
sumCGWrite += t.callgraphWriteTime;
225+
if t.metricsWriteTime < minMetWrite then minMetWrite = t.metricsWriteTime;
226+
if t.metricsWriteTime > maxMetWrite then maxMetWrite = t.metricsWriteTime;
227+
sumMetWrite += t.metricsWriteTime;
228+
if t.writeTime < minWrite then minWrite = t.writeTime;
229+
if t.writeTime > maxWrite then maxWrite = t.writeTime;
230+
sumWrite += t.writeTime;
231+
if t.totalTime < minTotal then minTotal = t.totalTime;
232+
if t.totalTime > maxTotal then maxTotal = t.totalTime;
233+
sumTotal += t.totalTime;
234+
totalEvents += t.eventsRead;
235+
}
236+
237+
const cnt = n: real;
238+
const meanOpen = sumOpen / cnt;
239+
const meanSetup = sumSetup / cnt;
240+
const meanRead = sumRead / cnt;
241+
const meanCGWrite = sumCGWrite / cnt;
242+
const meanMetWrite = sumMetWrite / cnt;
243+
const meanWrite = sumWrite / cnt;
244+
const meanTotal = sumTotal / cnt;
245+
246+
writef("%s%<27s %8.3dr %9.3dr %8.3dr %12.3dr %13.3dr %13.3dr %9.3dr\n",
247+
prefix, "Min", minOpen, minSetup, minRead, minCGWrite, minMetWrite, minWrite, minTotal);
248+
writef("%s%<27s %8.3dr %9.3dr %8.3dr %12.3dr %13.3dr %13.3dr %9.3dr\n",
249+
prefix, "Max", maxOpen, maxSetup, maxRead, maxCGWrite, maxMetWrite, maxWrite, maxTotal);
250+
writef("%s%<27s %8.3dr %9.3dr %8.3dr %12.3dr %13.3dr %13.3dr %9.3dr\n",
251+
prefix, "Mean", meanOpen, meanSetup, meanRead, meanCGWrite, meanMetWrite, meanWrite, meanTotal);
252+
253+
if meanTotal > 0.0 {
254+
const imbOpen = if meanOpen > 0.0 then maxOpen / meanOpen else 0.0;
255+
const imbSetup = if meanSetup > 0.0 then maxSetup / meanSetup else 0.0;
256+
const imbRead = if meanRead > 0.0 then maxRead / meanRead else 0.0;
257+
const imbCGWrite = if meanCGWrite > 0.0 then maxCGWrite / meanCGWrite else 0.0;
258+
const imbMetWrite = if meanMetWrite > 0.0 then maxMetWrite / meanMetWrite else 0.0;
259+
const imbWrite = if meanWrite > 0.0 then maxWrite / meanWrite else 0.0;
260+
const imbTotal = maxTotal / meanTotal;
261+
writef("%s%<27s %7.3drx %8.3drx %7.3drx %11.3drx %12.3drx %12.3drx %8.3drx\n",
262+
prefix, "Imbalance (max/mean)", imbOpen, imbSetup, imbRead, imbCGWrite, imbMetWrite, imbWrite, imbTotal);
263+
}
264+
}
265+
181266
// -------------------------------------------------------------------------
182267
// printPhase — helper to print one phase line with percentage
183268
// -------------------------------------------------------------------------

apps/FastOTF2Converter/src/Strategy_LocBlock.chpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ module Strategy_LocBlock {
8787
logInfo("Finished converting trace in ", global_sw.elapsed(), " seconds");
8888

8989
if conf.timings {
90-
var report = new TimingReport(numTasks=numberOfReaders);
90+
var report = new TimingReport();
9191
report.strategy = conf.strategy;
9292
report.totalTime = global_sw.elapsed();
9393
report.defOpenTime = defResult.openTime;

apps/FastOTF2Converter/src/Strategy_LocGroupBlock.chpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ module Strategy_LocGroupBlock {
114114
logInfo("Finished converting trace in ", totalConversionTime, " seconds");
115115

116116
if conf.timings {
117-
var report = new TimingReport(numTasks=numberOfReaders);
117+
var report = new TimingReport();
118118
report.strategy = conf.strategy;
119119
report.totalTime = totalConversionTime;
120120
report.defOpenTime = defResult.openTime;

0 commit comments

Comments
 (0)