Skip to content

Commit 63e2708

Browse files
leifericfclaude
andcommitted
bc: add per-opcode dispatch counter behind a build flag
VM perf work needs to know which opcodes dominate the dispatch loop. Sample-based profiling is too coarse; the existing MINO_BC_PROFILE_COUNTS flag only tracks int-allocation rates. This adds MINO_BC_OP_COUNTS=1: a per-opcode counter array incremented once per dispatch, dumped to stderr at process exit via atexit. Output is sorted by frequency descending and includes percentage + cumulative percentage. Production builds (without the flag) are byte-identical to before. Captured findings from running the full microbench suite under this build at v0.157.0: 18 of 63 opcodes account for ~99% of dispatches. Top five are OP_RETURN, OP_LOAD_K, OP_MOVE, OP_JMPIFNOT, and OP_GETGLOBAL_CACHED. fib-30's hot loop touches only 8 distinct opcodes; tail-call-100k touches 6. loop-recur-1M and pipeline-sum are no longer dispatch-bound (OP_LOOP_INT_DEC_INC and v0.157.0's seq-fusion handle their inner loops respectively). These findings shape the post-v0.157.0 VM perf plan (in .local/, gitignored). Verification: 1 659 tests / 7 690 assertions green on release. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 77a3612 commit 63e2708

3 files changed

Lines changed: 156 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,37 @@
11
# Changelog
22

3+
## v0.157.1 — Per-opcode Dispatch Counter Build Flag
4+
5+
Adds `MINO_BC_OP_COUNTS=1` build flag that wires a per-opcode
6+
dispatch counter into `vm.c` and dumps the totals to stderr at
7+
process exit. Useful for VM perf work — answers "which opcodes
8+
actually dominate the dispatch loop?" without resorting to
9+
sample-based profiling. The flag adds one branch + one increment
10+
per dispatch when set; production builds (no flag) are
11+
byte-identical.
12+
13+
Build with the flag:
14+
15+
```
16+
cc ... -DMINO_BC_OP_COUNTS=1 ... -o mino_opcounts ...
17+
./mino_opcounts your_script.clj 2> opcounts.txt
18+
```
19+
20+
Output is sorted by frequency, includes percentage and cumulative
21+
percentage. Captured findings from this build informed the
22+
post-v0.157.0 VM perf plan (in `.local/`, gitignored): 18 of 63
23+
opcodes account for ~99% of dispatches across the microbench
24+
suite, which validates the hot/cold partition direction.
25+
26+
### Added
27+
28+
- `MINO_BC_OP_COUNTS=1` build flag and the supporting per-opcode
29+
counter array in `src/eval/bc/vm.c`.
30+
- atexit-registered dump function that sorts by dispatch count
31+
descending and prints per-op + cumulative percentages.
32+
33+
Verification: 1 659 tests / 7 690 assertions green on release.
34+
335
## v0.157.0 — Transducer Fusion For Reduce Pipelines
436

537
`(reduce f init (->> src (map ...) (filter ...) (take ...)))` no

src/eval/bc/vm.c

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <stddef.h>
1717
#include <stdint.h>
1818
#include <stdio.h>
19+
#include <stdlib.h>
1920
#include <string.h>
2021

2122
#include "mino.h"
@@ -28,6 +29,121 @@
2829

2930
extern mino_val_t *mino_nil(mino_state_t *S);
3031

32+
#ifdef MINO_BC_OP_COUNTS
33+
/* Per-opcode dispatch counter, populated when the binary is built with
34+
* -DMINO_BC_OP_COUNTS=1. Dumped to stderr at process exit. Used during
35+
* VM design experiments to identify which opcodes dominate the
36+
* dispatch loop (hot/cold partition decisions). Not for production. */
37+
static size_t g_op_counts[OP__COUNT];
38+
static int g_op_counts_atexit_registered;
39+
40+
static const char *op_count_name(unsigned op);
41+
42+
static void op_counts_dump(void)
43+
{
44+
size_t total = 0;
45+
int i;
46+
/* Pack (idx, count) pairs so the sort preserves opcode identity. */
47+
typedef struct { unsigned op; size_t count; } row_t;
48+
row_t rows[OP__COUNT];
49+
for (i = 0; i < OP__COUNT; i++) {
50+
rows[i].op = (unsigned)i;
51+
rows[i].count = g_op_counts[i];
52+
total += rows[i].count;
53+
}
54+
/* Sort by count descending. N is small (~63); bubble-sort fine. */
55+
for (i = 0; i < OP__COUNT; i++) {
56+
int j;
57+
for (j = i + 1; j < OP__COUNT; j++) {
58+
if (rows[j].count > rows[i].count) {
59+
row_t t = rows[i]; rows[i] = rows[j]; rows[j] = t;
60+
}
61+
}
62+
}
63+
fprintf(stderr, "bc-op-counts: total dispatches = %zu\n", total);
64+
if (total == 0) return;
65+
size_t cumulative = 0;
66+
for (i = 0; i < OP__COUNT; i++) {
67+
if (rows[i].count == 0) break;
68+
cumulative += rows[i].count;
69+
fprintf(stderr, " %-25s %12zu %6.2f%% cum=%6.2f%%\n",
70+
op_count_name(rows[i].op), rows[i].count,
71+
100.0 * (double)rows[i].count / (double)total,
72+
100.0 * (double)cumulative / (double)total);
73+
}
74+
}
75+
76+
static const char *op_count_name(unsigned op)
77+
{
78+
switch (op) {
79+
case OP_NOP: return "OP_NOP";
80+
case OP_MOVE: return "OP_MOVE";
81+
case OP_LOAD_K: return "OP_LOAD_K";
82+
case OP_GETGLOBAL: return "OP_GETGLOBAL";
83+
case OP_SETGLOBAL: return "OP_SETGLOBAL";
84+
case OP_JMP: return "OP_JMP";
85+
case OP_JMPIFNOT: return "OP_JMPIFNOT";
86+
case OP_CALL: return "OP_CALL";
87+
case OP_TAILCALL: return "OP_TAILCALL";
88+
case OP_RETURN: return "OP_RETURN";
89+
case OP_CLOSURE: return "OP_CLOSURE";
90+
case OP_BINOP_INT: return "OP_BINOP_INT";
91+
case OP_PUSHCATCH: return "OP_PUSHCATCH";
92+
case OP_POPCATCH: return "OP_POPCATCH";
93+
case OP_THROW: return "OP_THROW";
94+
case OP_PUSHDYN: return "OP_PUSHDYN";
95+
case OP_POPDYN: return "OP_POPDYN";
96+
case OP_MAKE_LAZY: return "OP_MAKE_LAZY";
97+
case OP_GETGLOBAL_CACHED: return "OP_GETGLOBAL_CACHED";
98+
case OP_CALL_CACHED: return "OP_CALL_CACHED";
99+
case OP_ADD_II: return "OP_ADD_II";
100+
case OP_SUB_II: return "OP_SUB_II";
101+
case OP_MUL_II: return "OP_MUL_II";
102+
case OP_LT_II: return "OP_LT_II";
103+
case OP_LE_II: return "OP_LE_II";
104+
case OP_GT_II: return "OP_GT_II";
105+
case OP_GE_II: return "OP_GE_II";
106+
case OP_EQ_II: return "OP_EQ_II";
107+
case OP_INC_I: return "OP_INC_I";
108+
case OP_DEC_I: return "OP_DEC_I";
109+
case OP_ZERO_INT_P: return "OP_ZERO_INT_P";
110+
case OP_MOD_II: return "OP_MOD_II";
111+
case OP_QUOT_II: return "OP_QUOT_II";
112+
case OP_REM_II: return "OP_REM_II";
113+
case OP_BAND_II: return "OP_BAND_II";
114+
case OP_BOR_II: return "OP_BOR_II";
115+
case OP_BXOR_II: return "OP_BXOR_II";
116+
case OP_SHL_II: return "OP_SHL_II";
117+
case OP_SHR_II: return "OP_SHR_II";
118+
case OP_USHR_II: return "OP_USHR_II";
119+
case OP_POS_P_I: return "OP_POS_P_I";
120+
case OP_NEG_P_I: return "OP_NEG_P_I";
121+
case OP_EVEN_P_I: return "OP_EVEN_P_I";
122+
case OP_ODD_P_I: return "OP_ODD_P_I";
123+
case OP_BNOT_I: return "OP_BNOT_I";
124+
case OP_ADD_IK: return "OP_ADD_IK";
125+
case OP_SUB_IK: return "OP_SUB_IK";
126+
case OP_LT_IK: return "OP_LT_IK";
127+
case OP_LE_IK: return "OP_LE_IK";
128+
case OP_EQ_IK: return "OP_EQ_IK";
129+
case OP_GET_KW_MAP: return "OP_GET_KW_MAP";
130+
case OP_NTH_VEC: return "OP_NTH_VEC";
131+
case OP_CONJ_VEC: return "OP_CONJ_VEC";
132+
case OP_ASSOC: return "OP_ASSOC";
133+
case OP_DISSOC: return "OP_DISSOC";
134+
case OP_FIRST_VEC: return "OP_FIRST_VEC";
135+
case OP_COUNT_VEC: return "OP_COUNT_VEC";
136+
case OP_EMPTY_VEC: return "OP_EMPTY_VEC";
137+
case OP_LOOP_INT_DEC: return "OP_LOOP_INT_DEC";
138+
case OP_LOOP_INT_DEC_INC: return "OP_LOOP_INT_DEC_INC";
139+
case OP_PUSH_ENV: return "OP_PUSH_ENV";
140+
case OP_POP_ENV: return "OP_POP_ENV";
141+
case OP_ENV_BIND: return "OP_ENV_BIND";
142+
default: return "OP_UNKNOWN";
143+
}
144+
}
145+
#endif
146+
31147
/* Grow S->bc_regs to hold an additional `n` slots and return the base
32148
* index of the new window. Returns (size_t)-1 on allocation failure. */
33149
static size_t bc_push_window(mino_state_t *S, int n)
@@ -366,6 +482,13 @@ mino_val_t *mino_bc_run(mino_state_t *S, mino_val_t *fn_val,
366482
regs = S->bc_regs + base;
367483
mino_bc_insn_t ins = code[pc++];
368484
unsigned op = OP_OF(ins);
485+
#ifdef MINO_BC_OP_COUNTS
486+
if (!g_op_counts_atexit_registered) {
487+
atexit(op_counts_dump);
488+
g_op_counts_atexit_registered = 1;
489+
}
490+
if (op < OP__COUNT) g_op_counts[op]++;
491+
#endif
369492
switch (op) {
370493
case OP_NOP:
371494
break;

src/mino.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
*/
2929
#define MINO_VERSION_MAJOR 0
3030
#define MINO_VERSION_MINOR 157
31-
#define MINO_VERSION_PATCH 0
31+
#define MINO_VERSION_PATCH 1
3232

3333
/*
3434
* Human-readable version string of the *linked* runtime, e.g. "0.48.0".

0 commit comments

Comments
 (0)