-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathmain.js
More file actions
1804 lines (1650 loc) · 77.6 KB
/
Copy pathmain.js
File metadata and controls
1804 lines (1650 loc) · 77.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* samples/bench/main.js
* =====================
*
* Browser-side bench orchestrator. Three tabs:
*
* 1. Full comparison - all 4 directions x all 4 jsColorEngine
* lutModes + lcms-wasm (HIGHRES + NOOPT). Per
* cell measures LUT build + cold + hot.
* 2. JIT warmup curve - one direction + mode, plot per-iter ms over
* N iterations. Visualises Ignition -> Sparkplug
* -> TurboFan tier-up.
* 3. Pixel-count sweep - one direction + mode, sweep pixel count from
* 4 K to 4 M. Sanity-checks that headline MPx/s
* is L2-cache-flattering, not peak truth.
*
* Loads jsColorEngine via the UMD bundle (`window.jsColorEngine`) and
* lcms-wasm via dynamic ESM import (see lcms-runner.js).
*
* UI yielding: between every config we `await yieldUi()` so the progress
* bar / status text actually paints. Timing-critical loops use
* `performance.now()` not Date.now (us precision in modern browsers).
*/
import { loadLcms, buildProfiles, freeProfiles, makeLcmsRunner, probeLcmsBuild } from './lcms-runner.js';
// CMYK profile: same GRACoL print standard as tests, but the on-disk name
// in samples/profiles/ is CoatedGRACoL2006.icc (see other sample pages).
const PROFILE_URL = '../profiles/CoatedGRACoL2006.icc';
// RGB->RGB MUST be sRGB -> AdobeRGB (NOT sRGB -> sRGB). If both endpoints
// are sRGB, lcms's pipeline optimiser collapses the transform to an identity
// at cmsCreateTransform() time, and the resulting throughput is ~30% higher
// than any legitimate RGB->RGB conversion (measured: 78 vs 60 MPx/s in a
// node smoke-test). Every other benchmark in this suite exercises a real
// non-identity transform, so RGB->RGB has to too.
const ADOBE_RGB_URL = '../profiles/AdobeRGB1998.icc';
// ============================================================ STATE
const state = {
jsce: null, // window.jsColorEngine reference
jsGracol: null, // jsColorEngine Profile instance
profileBytes: null, // raw bytes (for lcms)
lcms: null, // instantiated lcms-wasm runtime
lcmsConsts: null, // named exports (TYPE_*, INTENT_*, cmsFLAGS_*)
lcmsProfiles: null, // { srgb, adobe, cmyk, lab, cmykName, adobeName }
lcmsAvailable: false,
simdSupported: null, // tri-state until detected
initOnce: null, // memoised init() promise
activeRunner: null, // {abort: () => void} for the in-flight bench
};
// ============================================================ TINY UTILS
function $(sel) { return document.querySelector(sel); }
function $$(sel) { return document.querySelectorAll(sel); }
function nowMs() { return performance.now(); }
/**
* Yield to the event loop so the browser can paint. Two rAFs ensures
* the layout step actually runs before we resume - one rAF queues for
* the next frame, the second rAF runs after the paint has committed.
*/
function yieldUi() {
return new Promise((resolve) => {
requestAnimationFrame(() => requestAnimationFrame(resolve));
});
}
function median(arr) {
const s = [...arr].sort((a, b) => a - b);
return s[(s.length / 2) | 0];
}
function mpxPerSec(msPerIter, pixelCount) {
if (msPerIter <= 0) return 0;
return (pixelCount / 1e6) / (msPerIter / 1000);
}
function fmtMs(ms) {
if (!isFinite(ms)) return '-';
if (ms < 0.1) return ms.toFixed(3);
if (ms < 10) return ms.toFixed(2);
if (ms < 100) return ms.toFixed(1);
return ms.toFixed(0);
}
function fmtMpx(m) {
if (!isFinite(m) || m <= 0) return '-';
if (m < 1) return m.toFixed(2);
if (m < 10) return m.toFixed(1);
return m.toFixed(0);
}
function fmtMBs(m) {
if (!isFinite(m) || m <= 0) return '-';
if (m < 10) return m.toFixed(1);
if (m < 100) return m.toFixed(0);
return m.toFixed(0);
}
function totalBpp(inCh, outCh, is16bit) {
return (inCh + outCh) * (is16bit ? 2 : 1);
}
/**
* Reorder #results-full tbody rows so each direction block is sorted by
* MPx/s descending (fastest first). Error rows (no `data-mpx` cell) sink
* to the bottom of their block. Re-applies `dir-sep` on the first row
* of each block only.
*/
function reorderFullComparisonTbody(tbody, directions) {
const byDir = new Map();
for (const d of directions) {
byDir.set(d.id, []);
}
for (const tr of tbody.querySelectorAll('tr')) {
const id = tr.dataset.benchDir;
if (id && byDir.has(id)) {
byDir.get(id).push(tr);
}
}
for (const d of directions) {
const list = byDir.get(d.id);
list.sort((a, b) => {
const aCell = a.querySelector('td.num[data-mpx]');
const bCell = b.querySelector('td.num[data-mpx]');
const aM = aCell ? parseFloat(aCell.dataset.mpx) : -Infinity;
const bM = bCell ? parseFloat(bCell.dataset.mpx) : -Infinity;
return bM - aM;
});
}
for (const d of directions) {
byDir.get(d.id).forEach((tr, i) => {
if (i === 0) {
tr.classList.add('dir-sep');
} else {
tr.classList.remove('dir-sep');
}
tbody.appendChild(tr);
});
}
}
/**
* Match table order: per direction, MPx/s descending (for copy-to-markdown).
*/
function sortFullResultsByDirAndMpx(results, directions) {
const byDir = new Map();
for (const d of directions) {
byDir.set(d.id, []);
}
for (const r of results) {
if (byDir.has(r.dirId)) {
byDir.get(r.dirId).push(r);
}
}
const out = [];
for (const d of directions) {
const list = byDir.get(d.id);
list.sort((a, b) => b.mpxs - a.mpxs);
for (const x of list) {
out.push(x);
}
}
return out;
}
/**
* Same seeded PRNG as bench/mpx_summary.js + bench/lcms-comparison/bench.js.
* Identical bytes both sides means like-for-like cache behaviour.
*/
function buildInput(channels, pixelCount) {
const arr = new Uint8ClampedArray(pixelCount * channels);
let seed = 0x13579bdf;
for (let i = 0; i < arr.length; i++) {
seed = (seed * 1103515245 + 12345) & 0x7fffffff;
arr[i] = seed & 0xff;
}
return arr;
}
/**
* 16-bit input (full u16 range, not just u8 expanded). Same PRNG shape as
* buildInput so the byte stream is correlated across calls of the same
* seed - lets jsce u16 and lcms u16 see the same pixels and keeps cache
* behaviour like-for-like with the u8 path.
*/
function buildInputU16(channels, pixelCount) {
const arr = new Uint16Array(pixelCount * channels);
let seed = 0x13579bdf;
for (let i = 0; i < arr.length; i++) {
seed = (seed * 1103515245 + 12345) & 0x7fffffff;
arr[i] = seed & 0xffff;
}
return arr;
}
/**
* Detect WebAssembly SIMD support. Validates a minimal module:
* (module (func (result v128) v128.const i32x4 0 0 0 0))
*
* Byte layout (43 bytes total):
* [0..7] header: \0asm + version 1
* [8..14] type: id=1, size=5, 1 type, func() -> [v128 (0x7b)]
* [15..18] func: id=3, size=2, 1 func using type idx 0
* [19..42] code: id=10, size=22, 1 body of size 20:
* 0 locals
* v128.const (0xfd 0x0c) + 16 immediate bytes
* end (0x0b)
*
* The first version of this detector (shipped briefly in v1.2-dev) had
* the wrong body-size and section-size prefixes (0x08 / 0x0a instead of
* 0x14 / 0x16), so validate() rejected the module regardless of SIMD
* support - a false-negative in EVERY modern browser, which showed up
* as "WASM SIMD: NOT AVAILABLE" even in Chrome and Firefox where SIMD
* is universally supported. Fixed: correct prefixes make validate()
* accept iff the host actually supports v128.
*/
async function detectWasmSimd() {
if (typeof WebAssembly !== 'object') return false;
const bytes = new Uint8Array([
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, // header
0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7b, // type: () -> [v128]
0x03, 0x02, 0x01, 0x00, // func: type idx 0
0x0a, 0x16, 0x01, // code: section size=22, 1 func
0x14, 0x00, // body size=20, 0 locals
0xfd, 0x0c, // v128.const
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 imm bytes (zero)
0x0b, // end
]);
try {
return WebAssembly.validate(bytes);
} catch (_) {
return false;
}
}
// ============================================================ DIRECTION CONFIG
/**
* Four standard directions. Each yields:
* - js : { src, dst, inCh, outCh } where src/dst are the args for
* jsColorEngine Transform.create()
* - lcms: { pInKey, fInKey, pOutKey, fOutKey, inCh, outCh } - keys
* into the lcmsProfiles + lcmsConsts maps so we can rebuild
* the workflow object lazily once lcms is loaded.
* - shortLabel : table cell label
*/
function directionConfigs(jsGracol) {
// Each direction carries BOTH 8-bit and 16-bit lcms format names. The
// 16-bit ones (fIn16/fOut16) are what we feed when the bench mode is
// a TYPE_*_16 lcms variant - same profile handles, different format
// tag. The lcms transform is recreated per cell anyway (cmsCreateTransform
// bakes the precalc LUT against the chosen format), so this is free.
return [
{
id: 'rgb-rgb',
shortLabel: 'RGB → RGB',
longLabel: 'RGB to RGB (sRGB to AdobeRGB)',
js: { src: '*srgb', dst: '*adobergb', inCh: 3, outCh: 3 },
// pOut: 'adobe' (NOT 'srgb') to force a real matrix+curves conversion.
// With pOut: 'srgb' lcms detects the identity at transform-create time
// and collapses the hot path to a memcpy - bogus ~30% speedup.
lcms: { pIn: 'srgb', fIn: 'TYPE_RGB_8', fIn16: 'TYPE_RGB_16', pOut: 'adobe', fOut: 'TYPE_RGB_8', fOut16: 'TYPE_RGB_16', inCh: 3, outCh: 3 },
},
{
id: 'rgb-cmyk',
shortLabel: 'RGB → CMYK',
longLabel: 'RGB to CMYK (sRGB to GRACoL)',
js: { src: '*srgb', dst: jsGracol, inCh: 3, outCh: 4 },
lcms: { pIn: 'srgb', fIn: 'TYPE_RGB_8', fIn16: 'TYPE_RGB_16', pOut: 'cmyk', fOut: 'TYPE_CMYK_8', fOut16: 'TYPE_CMYK_16', inCh: 3, outCh: 4 },
},
{
id: 'cmyk-rgb',
shortLabel: 'CMYK → RGB',
longLabel: 'CMYK to RGB (GRACoL to sRGB)',
js: { src: jsGracol, dst: '*srgb', inCh: 4, outCh: 3 },
lcms: { pIn: 'cmyk', fIn: 'TYPE_CMYK_8', fIn16: 'TYPE_CMYK_16', pOut: 'srgb', fOut: 'TYPE_RGB_8', fOut16: 'TYPE_RGB_16', inCh: 4, outCh: 3 },
},
{
id: 'cmyk-cmyk',
shortLabel: 'CMYK → CMYK',
longLabel: 'CMYK to CMYK (GRACoL to GRACoL)',
js: { src: jsGracol, dst: jsGracol, inCh: 4, outCh: 4 },
lcms: { pIn: 'cmyk', fIn: 'TYPE_CMYK_8', fIn16: 'TYPE_CMYK_16', pOut: 'cmyk', fOut: 'TYPE_CMYK_8', fOut16: 'TYPE_CMYK_16', inCh: 4, outCh: 4 },
},
];
}
/**
* Mirror of LittleCMS's `_cmsReasonableGridpointsByColorspace` from
* lcms2-2.18/src/cmspcs.c. lcms picks the precalc-LUT grid size at
* cmsCreateTransform() time based on:
* - input colorspace channel count, and
* - the flags (HIGHRESPRECALC bumps the grid up, LOWRESPRECALC drops it,
* NOOPTIMIZE skips the LUT entirely so this function is never called).
*
* For the four directions this bench runs, that resolves to:
* RGB (inCh=3): default=33, HIGHRES=49, LOWRES=17
* CMYK (inCh=4): default=17, HIGHRES=23, LOWRES=17
*
* The explicit-override path (`dwFlags & 0x00FF0000`) isn't used here.
* We do not call lcms's precalc API ourselves; this is purely for
* labelling the LUT column so the reader can verify the grid at a glance
* without digging into lcms internals.
*
* Returns 0 for NOOPTIMIZE (no LUT built at all).
*/
function lcmsReasonableGrid(inCh, flags, consts) {
if (!consts) return 0;
if (flags & consts.cmsFLAGS_NOOPTIMIZE) return 0;
// HIGHRESPRECALC - maximum resolution
if (flags & consts.cmsFLAGS_HIGHRESPRECALC) {
if (inCh > 4) return 7; // Hifi
if (inCh === 4) return 23; // CMYK
return 49; // RGB and others
}
// LOWRESPRECALC - lower resolution
if (consts.cmsFLAGS_LOWRESPRECALC && (flags & consts.cmsFLAGS_LOWRESPRECALC)) {
if (inCh > 4) return 6;
if (inCh === 1) return 33; // monochrome
return 17; // remaining
}
// Default
if (inCh > 4) return 7;
if (inCh === 4) return 17; // CMYK
return 33; // RGB
}
// jsColorEngine modes we always test in the full-comparison view.
// Order matters: int comes 2nd so that vs-int speedups for the WASM rows
// have a baseline already in scope. `no-lut` is the "full pipeline per
// pixel" accuracy path (buildLut: false) - same kernel family as lcms
// NOOPTIMIZE, answer to "how fast is jsce when you tell it to prioritise
// math fidelity over throughput".
//
// `int16` is the v1.3 16-bit-I/O path: same Q0.16 u16 LUT cells as `int`,
// but the kernel reads u16 input and writes u16 output (Uint16Array both
// directions). Slotted in right after `int` so the speed comparison is
// adjacent. See bench/int16_poc/RESULTS.md and Transform.js
// `tetrahedralInterp*Array_*Ch_intLut16_loop` for the implementation.
//
// `int16-wasm-scalar` and `int16-wasm-simd` (v1.3) are the WASM siblings
// of `int16`. Same Q0.13-weight u16 CLUT, same arithmetic as the JS u16
// kernel — bit-exact, just compiled to wasm32 (scalar) and v128 SIMD.
// All three int16-* rows share one intLut. Slotted next to `int16` so
// the JS/scalar/SIMD speed step is visible in adjacent rows.
const JSCE_MODES = [
{ id: 'no-lut', label: 'jsce no-LUT (f64)', badge: 'b-nolut', isLut: false, dataFormat: 'int8' },
{ id: 'float', label: 'jsce float', badge: 'b-float', isLut: true, dataFormat: 'int8' },
{ id: 'int', label: 'jsce int', badge: 'b-int', isLut: true, dataFormat: 'int8' },
{ id: 'int16', label: 'jsce int16 (u16 I/O)', badge: 'b-int16', isLut: true, dataFormat: 'int16' },
{ id: 'int16-wasm-scalar', label: 'jsce int16-wasm-scalar', badge: 'b-int16ws', isLut: true, dataFormat: 'int16' },
{ id: 'int16-wasm-simd', label: 'jsce int16-wasm-simd', badge: 'b-int16wsi', isLut: true, dataFormat: 'int16' },
{ id: 'int-wasm-scalar', label: 'jsce int-wasm-scalar', badge: 'b-wasm', isLut: true, dataFormat: 'int8' },
{ id: 'int-wasm-simd', label: 'jsce int-wasm-simd', badge: 'b-simd', isLut: true, dataFormat: 'int8' },
];
// ============================================================ INIT / BOOT
async function init() {
if (state.initOnce) return state.initOnce;
state.initOnce = (async () => {
// ---- 1. jsColorEngine UMD global ----
if (typeof window.jsColorEngine === 'undefined') {
throw new Error(
'window.jsColorEngine is undefined. Did you build the UMD bundle?\n' +
'Run: npm run browser (writes to browser/jsColorEngineWeb.js)'
);
}
state.jsce = window.jsColorEngine;
$('#info-jsce').textContent = 'loaded (window.jsColorEngine)';
$('#info-jsce').classList.add('is-ok');
// version - parse from the UMD bundle if available, else "?"
// The package.json version is what we care about; the engine doesn't
// expose it as a runtime constant (yet), so we mark it as "see footer"
// and the user can cross-check against package.json.
$('#info-version').textContent = '1.3 (target)';
// ---- 2. host capabilities ----
$('#info-wasm').textContent = (typeof WebAssembly !== 'undefined') ? 'available' : 'NOT AVAILABLE';
$('#info-wasm').classList.add(typeof WebAssembly !== 'undefined' ? 'is-ok' : 'is-error');
state.simdSupported = await detectWasmSimd();
$('#info-simd').textContent = state.simdSupported ? 'available' : 'NOT AVAILABLE (will demote)';
$('#info-simd').classList.add(state.simdSupported ? 'is-ok' : 'is-warn');
$('#info-ua').textContent = navigator.userAgent;
$('#info-cores').textContent = String(navigator.hardwareConcurrency || '?');
$('#info-secure').textContent = window.isSecureContext
? 'secure context'
: 'NOT secure (some perf APIs throttled)';
$('#info-secure').classList.add(window.isSecureContext ? 'is-ok' : 'is-warn');
// ---- 3. Load profile bytes in parallel (used by both engines) ----
// GRACoL = our CMYK target (ICC on disk, large).
// AdobeRGB = our non-sRGB RGB target so the RGB->RGB row is a
// real matrix+curves conversion, not an identity passthrough.
// lcms-wasm doesn't expose cmsCreateRGBProfile() so we load
// a 560-byte reference profile from disk instead.
const t0 = nowMs();
const [respGracol, respAdobe] = await Promise.all([
fetch(PROFILE_URL),
fetch(ADOBE_RGB_URL),
]);
if (!respGracol.ok) {
throw new Error('Failed to fetch ' + PROFILE_URL + ' (' + respGracol.status + ')');
}
if (!respAdobe.ok) {
throw new Error('Failed to fetch ' + ADOBE_RGB_URL + ' (' + respAdobe.status + ')');
}
const [bufGracolAb, bufAdobeAb] = await Promise.all([
respGracol.arrayBuffer(),
respAdobe.arrayBuffer(),
]);
const buf = new Uint8Array(bufGracolAb);
const bufAdobe = new Uint8Array(bufAdobeAb);
state.profileBytes = buf;
state.adobeProfileBytes = bufAdobe;
// jsce GRACoL profile - decode in-memory (sync after we hand it the bytes)
state.jsGracol = new state.jsce.Profile();
state.jsGracol.loadBinary(buf);
if (!state.jsGracol.loaded) {
throw new Error('jsColorEngine: failed to decode CoatedGRACoL2006.icc');
}
// jsce side of RGB->RGB uses the built-in '*adobergb' profile
// (same matrix primaries + gamma 2.2 as the AdobeRGB ICC bytes we
// feed to lcms). We're not loading the ICC into jsce because the
// built-in resolves faster and is byte-identical in colour output
// for our purposes. Smoke-tested: 65K pixel RGB->RGB matches to
// within +/-1 LSB vs the ICC.
const profileMs = nowMs() - t0;
$('#info-profile').textContent =
'CoatedGRACoL2006.icc (' + (buf.byteLength / 1024).toFixed(0) + ' KB) + ' +
'AdobeRGB1998.icc (' + (bufAdobe.byteLength / 1024).toFixed(1) + ' KB), ' +
'fetched + decoded in ' + profileMs.toFixed(0) + ' ms';
$('#info-profile').classList.add('is-ok');
// ---- 4. lcms-wasm (best-effort - missing => disable lcms rows) ----
try {
const { lcms, consts } = await loadLcms();
state.lcms = lcms;
state.lcmsConsts = consts;
state.lcmsProfiles = buildProfiles(lcms, buf, bufAdobe);
state.lcmsAvailable = true;
// Inspect lcms.wasm to surface its build characteristics. This
// is what tells the user why jsce can legitimately beat lcms
// on SIMD-capable hosts: lcms-wasm is a straight emcc -O3
// build of LittleCMS 2.16 *without* -msimd128, so every
// lcms kernel runs scalar regardless of host SIMD support.
// Meanwhile jsce ships hand-tuned v128 kernels that ARE SIMD.
// Not a bench bias - a genuine capability gap.
const build = await probeLcmsBuild();
let tag = '';
if (build && !build.error) {
// Informational only: lcms-wasm is a stock emcc -O3 build of
// LittleCMS 2.16 (no -msimd128). That is a property of the
// shipped binary, not a host-capability problem, so the cell
// stays green when lcms loads successfully.
tag = build.hasSimd
? ' - SIMD build'
: ' - stock scalar build';
}
$('#info-lcms').textContent =
'lcms ' + (consts.LCMS_VERSION || '?') + ' (wasm32) loaded' + tag;
$('#info-lcms').classList.add('is-ok');
} catch (err) {
state.lcmsAvailable = false;
$('#info-lcms').textContent =
'NOT AVAILABLE - add lcms.js + lcms.wasm under samples/lcms-wasm-dist/';
$('#info-lcms').classList.add('is-warn');
console.warn('lcms-wasm load failed:', err);
// Disable the lcms checkbox since we can't run those rows
const cb = $('#incl-lcms-full');
if (cb) { cb.checked = false; cb.disabled = true; }
}
})().catch((err) => {
// Surface init errors prominently
state.initOnce = null;
console.error(err);
const banner = document.createElement('div');
banner.className = 'card';
banner.innerHTML = '<h2 style="color:var(--error)">Init failed</h2>' +
'<pre style="white-space:pre-wrap; color:var(--error)">' +
String(err && err.stack || err) + '</pre>';
document.querySelector('main').prepend(banner);
throw err;
});
return state.initOnce;
}
// ============================================================ JSCE RUNNER
/**
* Build a jsColorEngine runner for a (direction, mode, pixelCount). Same
* shape as the lcms runner so the timing harness is identical.
*
* Notes on what gets measured at create() time:
* - profile decode: NOT measured (already done at init() time)
* - pipeline build + integer mirror LUT bake (`int*` modes): measured
* - WASM module compile + instantiate (`int-wasm-*` modes): measured
*
* The created runner exposes `.actualMode` so we can render demoted
* fallbacks (e.g. user picked int-wasm-simd but host has no SIMD ->
* Transform.lutMode auto-demoted to int-wasm-scalar at create() time).
*/
function makeJsceRunner(dir, modeId, pixelCount, sharedWasmCache) {
const jsce = state.jsce;
const wf = dir.js;
const isNoLut = (modeId === 'no-lut');
// Any mode whose id starts with 'int16' uses u16 typed I/O. That
// covers the JS u16 path (`int16`) AND the two WASM siblings
// (`int16-wasm-scalar` / `int16-wasm-simd`) — they all consume the
// same Uint16Array and produce the same Uint16Array, only the
// kernel changes.
const isInt16Family = (modeId === 'int16' || modeId.indexOf('int16-') === 0);
// Input typed-array width follows dataFormat, NOT lutMode:
// - int16-family : Uint16Array (full u16 range)
// - everything else (int / float / int-wasm-* / no-LUT): Uint8ClampedArray
const input = isInt16Family
? buildInputU16(wf.inCh, pixelCount)
: buildInput(wf.inCh, pixelCount);
const t0 = nowMs();
let opts;
if (isNoLut) {
opts = { dataFormat: 'int8', buildLut: false };
} else if (modeId === 'int16') {
// dataFormat: 'int16' + buildLut: true with no explicit lutMode lets
// the auto-resolver pick the best int16-family kernel for the host
// (currently demoting through int16-wasm-simd -> int16-wasm-scalar
// -> int16). For the bench we want to PIN this row to the JS u16
// kernel so the comparison vs the wasm rows is honest, so force it.
opts = { dataFormat: 'int16', buildLut: true, lutMode: 'int16' };
} else if (modeId === 'int16-wasm-scalar' || modeId === 'int16-wasm-simd') {
// u16 + explicit wasm lutMode. The shared wasm cache means
// int16-wasm-scalar + int16-wasm-simd in the same run share the
// module-compile cost; each Transform still gets its own
// linear-memory instance.
opts = { dataFormat: 'int16', buildLut: true, lutMode: modeId, wasmCache: sharedWasmCache };
} else {
opts = { dataFormat: 'int8', buildLut: true, lutMode: modeId, wasmCache: sharedWasmCache };
}
const xform = new jsce.Transform(opts);
xform.create(wf.src, wf.dst, jsce.eIntent.relative);
const lutBuildMs = nowMs() - t0;
// may differ from requested if host lacked e.g. SIMD and the engine demoted
const actualMode = isNoLut ? 'no-lut' : xform.lutMode;
// Describe the LUT for the results column. This answers the question
// "exactly how is this transform storing its colour data?" - which is
// what separates the accuracy case (no LUT, f64 pipeline) from the
// image case (pre-baked 33x33x33 u16 LUT + tetra interp).
//
// - `no-lut` : there IS no CLUT; every pixel walks the full
// per-stage pipeline in f64. Most accurate jsce
// path we ship.
// - `float` : Float64Array CLUT + f64 tetra interp. Same
// accuracy as no-LUT in practice (grid interp is
// the dominant error, not f64 vs f64) but much
// faster because the pipeline collapses to a LUT.
// - `int*` : Uint16Array CLUT (Q0.16) + int32 tetra interp.
// The "image throughput" configuration.
let lutDesc;
if (isNoLut) {
lutDesc = 'no LUT (f64 pipeline)';
} else if (xform.lut && xform.lut.CLUT) {
const g1 = xform.lut.g1 || 0;
const inCh = xform.lut.inputChannels || wf.inCh;
// Build e.g. "33x33x33" / "33x33x33x33"
const axes = new Array(inCh).fill(g1).join('\u00d7'); // x
const storage = (modeId === 'float') ? 'f64' : 'u16';
// Tag the I/O width when it diverges from the default u8 path so the
// reader can see "same LUT, different surface" at a glance.
const ioTag = isInt16Family ? ' (u16 I/O)' : '';
lutDesc = axes + ' ' + storage + ioTag;
} else {
lutDesc = '-';
}
function run() { xform.transformArray(input); }
function free() {
// jsColorEngine Transform has no explicit free; GC will clean up.
// Drop the closure references so the WASM linear-memory instances
// can be reclaimed sooner.
}
return { run, free, lutBuildMs, actualMode, lutDesc, input };
}
// ============================================================ MEASUREMENT CORE
/**
* Measure a runner. Returns { lutBuildMs, coldMs, hotMs, mpxs, samples }.
*
* Note: caller passes in `lutBuildMs` from the runner factory because that
* cost is paid before we have the runner object - we just plumb it through
* so all timing comes out of one struct.
*
* Cold == first run() call (includes JIT tier-up + WASM cache warm).
* Hot == warmup loop, then median of `BATCHES` batches of `perBatch` iters.
* Batches are independent timed regions so a single GC pause
* doesn't poison the steady-state number.
*/
async function measureRunner(runner, pixelCount, warmupIters, hotItersPerBatch) {
const BATCHES = 5;
// 1) Cold
const tCold = nowMs();
runner.run();
const coldMs = nowMs() - tCold;
// 2) Warmup - keep the UI alive every ~50 iters so the progress bar
// paints. Yielding kills tier-up if we do it too often, so keep
// the chunk size big enough that V8 doesn't deopt.
const warmupChunk = 50;
for (let w = 0; w < warmupIters; w += warmupChunk) {
const end = Math.min(warmupIters, w + warmupChunk);
for (let i = w; i < end; i++) runner.run();
await yieldUi();
}
// 3) Hot - 5 batches, take the median (robust to GC noise)
const samples = [];
for (let b = 0; b < BATCHES; b++) {
const t0 = nowMs();
for (let i = 0; i < hotItersPerBatch; i++) runner.run();
const t1 = nowMs();
samples.push((t1 - t0) / hotItersPerBatch);
await yieldUi();
}
const hotMs = median(samples);
return {
lutBuildMs: runner.lutBuildMs,
coldMs,
hotMs,
mpxs: mpxPerSec(hotMs, pixelCount),
samples,
};
}
// ============================================================ FULL COMPARISON
/**
* What the hot path is "made of" at a glance: f64 (jsce no-LUT / float);
* u8 (8-bit I/O + u16 integer LUT); u16 (jsce int16* rows = 16-bit I/O, or
* lcms-wasm NOOPT = integer u16 pipeline in this wasm build vs native f64).
* Lets you compare mixed rows without re-reading the Mode label every time.
*/
function benchTypeLabel(cfg) {
if (cfg.kind === 'jsce') {
const id = cfg.mode.id;
if (id === 'no-lut' || id === 'float') {
return 'f64';
}
if (id === 'int16' || id.indexOf('int16-') === 0) {
return 'u16';
}
return 'u8';
}
const no = state.lcmsConsts && state.lcmsConsts.cmsFLAGS_NOOPTIMIZE;
if (no && (cfg.lcmsFlags & no)) {
// lcms-wasm NOOPT: integer u16 pipeline in this wasm build (not f64).
return 'u16';
}
return (cfg.bitDepth === 16) ? 'u16' : 'u8';
}
function badgeForMode(modeId) {
if (modeId === 'no-lut') return 'b-nolut';
if (modeId === 'float') return 'b-float';
if (modeId === 'int') return 'b-int';
if (modeId === 'int16') return 'b-int16';
if (modeId === 'int16-wasm-scalar') return 'b-int16ws';
if (modeId === 'int16-wasm-simd') return 'b-int16wsi';
if (modeId === 'int-wasm-scalar') return 'b-wasm';
if (modeId === 'int-wasm-simd') return 'b-simd';
return 'b-lcms';
}
function setProgress(panelId, pct, text, kind) {
const wrap = $('#progress-' + panelId);
if (!wrap) return;
wrap.querySelector('.progress-bar').style.width = (pct * 100).toFixed(1) + '%';
const t = wrap.querySelector('.progress-text');
t.textContent = text;
t.classList.remove('is-busy', 'is-done', 'is-error');
if (kind) t.classList.add('is-' + kind);
}
async function runFullComparison() {
await init();
const pixelCount = parseInt($('#pixels-full').value, 10);
const warmupIters = parseInt($('#warmup-full').value, 10);
const hotPerBatch = parseInt($('#hot-full').value, 10);
const inclLcms = $('#incl-lcms-full').checked && state.lcmsAvailable;
const directions = directionConfigs(state.jsGracol);
// Build the full config matrix
const configs = [];
for (const dir of directions) {
for (const mode of JSCE_MODES) {
configs.push({ kind: 'jsce', dir, mode, isLut: mode.isLut });
}
if (inclLcms) {
// Three lcms variants so there is no question we've given lcms every
// chance to shine:
// - default (0) : what every real lcms app uses; lcms picks the
// grid size that *it* considers optimal for this
// profile (usually 33 for CMYK, matches jsce).
// - HIGHRESPRECALC: force the biggest LUT, matches jsce's design
// philosophy of "always LUT, always precomputed".
// - NOOPTIMIZE : no LUT at all, full per-pixel pipeline. The
// lcms equivalent of jsce `buildLut: false`.
// All three use pinned wasm-heap buffers and call _cmsDoTransform
// directly (no per-call _malloc/_free/.slice) - the fastest path
// callable from JS.
//
// `lutDesc` for lcms is inferred from lcms's own grid-selection
// rule (mirrored in lcmsReasonableGrid() above). lcms-wasm doesn't
// expose the precalc-LUT grid back to JS through its public API,
// but the rule is deterministic in the C source
// (cmspcs.c :: _cmsReasonableGridpointsByColorspace), so we can
// reconstruct the grid size exactly. Storage is u16 - lcms's
// precalc LUT for 8-bit input profiles is always Uint16.
const mkLcmsLutDesc = (flags, inCh, bit) => {
const g = lcmsReasonableGrid(inCh, flags, state.lcmsConsts);
if (g === 0) return 'no LUT (pipeline, ' + bit + '-bit I/O)';
// lcms's precalc LUT cells are always Uint16 internally,
// regardless of the I/O bit depth - what changes between
// 8-bit and 16-bit modes is the format converter at the
// pipeline edges, not the LUT shape. Tag the I/O width so
// the comparison vs jsce 'int' / 'int16' is unambiguous.
return new Array(inCh).fill(g).join('\u00d7') + ' u16 (' + bit + '-bit I/O)';
};
// 8-bit (TYPE_*_8) variants - paired with jsce int / float / int-wasm-*
configs.push({ kind: 'lcms', dir, lcmsFlags: 0, label: 'lcms-wasm default', badge: 'b-lcms', isLut: true, bitDepth: 8, lutDesc: mkLcmsLutDesc(0, dir.lcms.inCh, 8) });
configs.push({ kind: 'lcms', dir, lcmsFlags: state.lcmsConsts.cmsFLAGS_HIGHRESPRECALC, label: 'lcms-wasm HIGHRES', badge: 'b-lcms', isLut: true, bitDepth: 8, lutDesc: mkLcmsLutDesc(state.lcmsConsts.cmsFLAGS_HIGHRESPRECALC, dir.lcms.inCh, 8) });
configs.push({ kind: 'lcms', dir, lcmsFlags: state.lcmsConsts.cmsFLAGS_NOOPTIMIZE, label: 'lcms-wasm NOOPT', badge: 'b-lcms', isLut: false, bitDepth: 8, lutDesc: mkLcmsLutDesc(state.lcmsConsts.cmsFLAGS_NOOPTIMIZE, dir.lcms.inCh, 8) });
// 16-bit (TYPE_*_16) variants - paired with jsce int16. The lcms
// precalc-LUT GRID is the same as the 8-bit row (lcms picks grid
// by colourspace + flags, not by I/O bit depth), but the format
// converters at the pipeline edges flip from u8 to u16 - which is
// why headline MPx/s typically drops ~10-15% vs the 8-bit row.
configs.push({ kind: 'lcms', dir, lcmsFlags: 0, label: 'lcms-wasm default 16', badge: 'b-lcms', isLut: true, bitDepth: 16, lutDesc: mkLcmsLutDesc(0, dir.lcms.inCh, 16) });
configs.push({ kind: 'lcms', dir, lcmsFlags: state.lcmsConsts.cmsFLAGS_HIGHRESPRECALC, label: 'lcms-wasm HIGHRES 16', badge: 'b-lcms', isLut: true, bitDepth: 16, lutDesc: mkLcmsLutDesc(state.lcmsConsts.cmsFLAGS_HIGHRESPRECALC, dir.lcms.inCh, 16) });
configs.push({ kind: 'lcms', dir, lcmsFlags: state.lcmsConsts.cmsFLAGS_NOOPTIMIZE, label: 'lcms-wasm NOOPT 16', badge: 'b-lcms', isLut: false, bitDepth: 16, lutDesc: mkLcmsLutDesc(state.lcmsConsts.cmsFLAGS_NOOPTIMIZE, dir.lcms.inCh, 16) });
}
}
// Reset table
const tbody = $('#results-full tbody');
tbody.innerHTML = '';
$('#run-full').disabled = true;
$('#copy-full').disabled = true;
// Shared WASM module cache - means int-wasm-scalar + int-wasm-simd
// configs in the same run share compile work. (Each Transform still
// gets its own linear-memory instance.)
const sharedWasmCache = {};
let results = []; // collected for markdown / vs-int normalisation
let prevDirId = null;
for (let i = 0; i < configs.length; i++) {
const cfg = configs[i];
setProgress('full',
i / configs.length,
'Running ' + cfg.dir.shortLabel.replace('→', '->') + ' / ' +
(cfg.kind === 'jsce' ? cfg.mode.label : cfg.label) +
' (' + (i + 1) + '/' + configs.length + ')',
'busy');
await yieldUi();
const isNewDir = cfg.dir.id !== prevDirId;
const tr = document.createElement('tr');
tr.dataset.benchDir = cfg.dir.id;
if (isNewDir) tr.classList.add('dir-sep');
prevDirId = cfg.dir.id;
try {
let result, kernelLabel, badgeCls, demoted = false, lutDesc;
if (cfg.kind === 'jsce') {
const runner = makeJsceRunner(cfg.dir, cfg.mode.id, pixelCount, sharedWasmCache);
result = await measureRunner(runner, pixelCount, warmupIters, hotPerBatch);
kernelLabel = cfg.mode.label;
badgeCls = cfg.mode.badge;
lutDesc = runner.lutDesc;
if (runner.actualMode !== cfg.mode.id) {
kernelLabel = cfg.mode.label + ' [' + runner.actualMode + ']';
demoted = true;
}
runner.free();
} else {
// lcms - flip format tag + input width by bitDepth (8 or 16).
const wf = cfg.dir.lcms;
const is16 = (cfg.bitDepth === 16);
const fIn = is16 ? state.lcmsConsts[wf.fIn16] : state.lcmsConsts[wf.fIn];
const fOut = is16 ? state.lcmsConsts[wf.fOut16] : state.lcmsConsts[wf.fOut];
const input = is16
? buildInputU16(wf.inCh, pixelCount)
: buildInput(wf.inCh, pixelCount);
const runner = makeLcmsRunner(
state.lcms, state.lcmsConsts,
{
pIn: state.lcmsProfiles[wf.pIn],
fIn: fIn,
pOut: state.lcmsProfiles[wf.pOut],
fOut: fOut,
inCh: wf.inCh, outCh: wf.outCh,
},
cfg.lcmsFlags, input, pixelCount
);
result = await measureRunner(runner, pixelCount, warmupIters, hotPerBatch);
kernelLabel = cfg.label;
badgeCls = cfg.badge;
lutDesc = cfg.lutDesc;
runner.free();
}
const typeCode = benchTypeLabel(cfg);
const is16 = cfg.kind === 'jsce'
? (cfg.mode.dataFormat === 'int16')
: (cfg.bitDepth === 16);
const wf = cfg.kind === 'jsce' ? cfg.dir.js : cfg.dir.lcms;
const bpp = totalBpp(wf.inCh, wf.outCh, is16);
const mbps = result.mpxs * bpp;
tr.innerHTML =
'<td>' + cfg.dir.shortLabel + '</td>' +
'<td class="mode-cell ' + (demoted ? 'demoted' : '') + '">' +
'<span class="mode-badge ' + badgeCls + '">' + kernelLabel + '</span>' +
'</td>' +
'<td class="type-cell" title="f64=jsce no-LUT/float; u16 (NOOPT)=lcms-wasm integer pipeline in this build; u8/u16=I/O + integer LUT as labelled.">' +
typeCode + '</td>' +
'<td class="lut-cell">' + lutDesc + '</td>' +
'<td class="num">' + fmtMs(result.lutBuildMs) + '</td>' +
'<td class="num">' + fmtMs(result.coldMs) + '</td>' +
'<td class="num">' + fmtMs(result.hotMs) + '</td>' +
'<td class="num">' + fmtMpx(result.mpxs) + '</td>' +
'<td class="num">' + fmtMBs(mbps) + '</td>' +
'<td class="bar-col"><div class="bar"><div class="bar-fill ' + badgeCls + '" data-mpx="' + result.mpxs + '"></div></div></td>' +
'<td class="num" data-direction="' + cfg.dir.id + '" data-mpx="' + result.mpxs + '">-</td>';
tbody.appendChild(tr);
results.push({
dirId: cfg.dir.id,
dirLabel: cfg.dir.shortLabel,
mode: kernelLabel,
typeCode,
kind: cfg.kind,
isLut: cfg.isLut,
lutDesc,
mbps,
...result,
});
} catch (err) {
tr.innerHTML =
'<td>' + cfg.dir.shortLabel + '</td>' +
'<td class="mode-cell"><span class="mode-badge ' + badgeForMode(cfg.kind === 'jsce' ? cfg.mode.id : 'lcms') + '">' +
(cfg.kind === 'jsce' ? cfg.mode.label : cfg.label) + '</span></td>' +
'<td class="type-cell type-cell-na">—</td>' +
'<td class="error-cell" colspan="8">' + (err && err.message || err) + '</td>';
tbody.appendChild(tr);
console.error('Bench cell failed:', cfg, err);
}
await yieldUi();
}
reorderFullComparisonTbody(tbody, directions);
results = sortFullResultsByDirAndMpx(results, directions);
// ---- Post-process: normalise bars per direction + compute vs-int ----
const fastestPerDir = {};
const intMpxPerDir = {};
for (const r of results) {
if (!fastestPerDir[r.dirId] || r.mpxs > fastestPerDir[r.dirId]) {
fastestPerDir[r.dirId] = r.mpxs;
}
// 'jsce int' is the vs-ref baseline (v1.1's default LUT kernel).
if (r.mode === 'jsce int') intMpxPerDir[r.dirId] = r.mpxs;
}
$$('#results-full .bar-col .bar-fill').forEach((el) => {
const mpx = parseFloat(el.dataset.mpx);
const tr = el.closest('tr');
const dirId = tr.querySelector('td.num[data-direction]').dataset.direction;
const fastest = fastestPerDir[dirId];
const pct = fastest > 0 ? (mpx / fastest) * 100 : 0;
el.style.width = pct.toFixed(1) + '%';
});
// Highlight the fastest per direction
$$('#results-full tbody tr').forEach((tr) => {
const cell = tr.querySelector('td.num[data-mpx]');
if (!cell) return;
const mpx = parseFloat(cell.dataset.mpx);
const dirId = cell.dataset.direction;
const fastest = fastestPerDir[dirId];
if (Math.abs(mpx - fastest) < 0.001) {
tr.querySelectorAll('td.num').forEach((td) => td.classList.add('is-best'));
}
const intMpx = intMpxPerDir[dirId];
if (intMpx > 0) {
cell.textContent = (mpx / intMpx).toFixed(2) + 'x';
}
});
// ---- Summary cards: best-of per use case ----
// Use case grid:
// accuracy => isLut=false (jsce no-LUT / lcms NOOPTIMIZE)
// image => isLut=true (any LUT-backed mode, 8-bit I/O)
// For each (engine, case) tuple we pick the fastest row in EACH direction,
// so the reader sees 4 direction-specific numbers rather than a single
// headline that's always going to be RGB->RGB (the cheapest direction).
renderSummaryCards(results, directions, state.lcmsAvailable);
setProgress('full', 1, 'Done. ' + configs.length + ' configs measured. Best per direction is highlighted.', 'done');
$('#run-full').disabled = false;
$('#copy-full').disabled = false;
// Stash for the markdown copy button
state.lastFullResults = { results, fastestPerDir, intMpxPerDir, pixelCount, warmupIters, hotPerBatch };
}
/**
* Render the four "best of" summary cards at the top of the Full comparison
* panel. Each card is one (engine, useCase) tuple; rows within a card are
* the best-in-class MPx/s for each of the four directions.
*
* Winners are picked strictly from the current run's `results` array - no
* persistence across runs. If a category has zero results (e.g. lcms was
* excluded) the card shows em-dashes.
*/
function renderSummaryCards(results, directions, lcmsAvailable) {
const wrap = $('#summary-full');
if (!wrap) return;
wrap.hidden = false;
// Category predicates: (result) => boolean
const cats = [
{ id: 'jsce-accuracy', key: (r) => r.kind === 'jsce' && !r.isLut },
{ id: 'jsce-lut', key: (r) => r.kind === 'jsce' && r.isLut },
{ id: 'lcms-accuracy', key: (r) => r.kind === 'lcms' && !r.isLut },
{ id: 'lcms-lut', key: (r) => r.kind === 'lcms' && r.isLut },
];
for (const cat of cats) {
// winner per direction in this category
const byDir = {};
for (const r of results) {
if (!cat.key(r)) continue;
const cur = byDir[r.dirId];
if (!cur || r.mpxs > cur.mpxs) byDir[r.dirId] = r;
}
const rowsHtml = directions.map((dir) => {
const r = byDir[dir.id];
if (!r) {
return '<tr><td>' + dir.shortLabel + '</td>' +
'<td class="scr-mode">—</td>' +
'<td class="num scr-mpx">—</td></tr>';
}
// strip the "jsce " / "lcms-wasm " prefix for readability in the card
const shortMode = r.mode.replace(/^jsce\s+/i, '').replace(/^lcms-wasm\s+/i, '');
return '<tr>' +
'<td>' + dir.shortLabel + '</td>' +
'<td class="scr-mode">' + shortMode + '</td>' +
'<td class="num scr-mpx">' + fmtMpx(r.mpxs) + '</td>' +
'</tr>';
}).join('');
// min/max across directions: shown BIG at top of each card as a
// headline range ("5.4 - 12 MPx/s"), so the reader can glance at one
// number and know what they're trading between worst/best direction.
let loR = null, hiR = null;
for (const dirId in byDir) {
const r = byDir[dirId];
if (!loR || r.mpxs < loR.mpxs) loR = r;
if (!hiR || r.mpxs > hiR.mpxs) hiR = r;
}
const card = $('#sum-card-' + cat.id);
if (!card) continue;
const rangeEl = card.querySelector('.summary-range');
if (loR && hiR) {