|
29 | 29 |
|
30 | 30 | extern mino_val_t *mino_nil(mino_state_t *S); |
31 | 31 |
|
| 32 | +#ifdef MINO_CALL_SITE_SHAPES |
| 33 | +/* Per-site tally of OP_CALL_CACHED hits keyed by (slot pointer, arg- |
| 34 | + * type-pair). Populated when the binary is built with |
| 35 | + * -DMINO_CALL_SITE_SHAPES=1. Dumped to stderr at exit. Used to gate |
| 36 | + * the type-feedback IC item: counts sites that hit a canonical arith |
| 37 | + * callee with stable monomorphic-int operand types. Not for production. */ |
| 38 | +#define CALL_SHAPE_SITES_MAX 8192 |
| 39 | +typedef struct { |
| 40 | + const void *slot; /* mino_bc_ic_slot_t * -- unique site key */ |
| 41 | + mino_prim_fn callee_fn; /* resolved callee prim (NULL = not a prim) */ |
| 42 | + size_t total; |
| 43 | + size_t monomorphic_int_pair; /* both args tagged-int */ |
| 44 | + size_t other_shapes; |
| 45 | +} call_shape_row_t; |
| 46 | +static call_shape_row_t g_call_shapes[CALL_SHAPE_SITES_MAX]; |
| 47 | +static int g_call_shapes_used; |
| 48 | +static int g_call_shapes_atexit_done; |
| 49 | +static const char *call_shape_prim_name(mino_prim_fn fn); |
| 50 | +static void call_shapes_dump(void) |
| 51 | +{ |
| 52 | + int i, hot = 0, mono_hot = 0; |
| 53 | + size_t hot_hits = 0, mono_hits = 0; |
| 54 | + fprintf(stderr, "call-site-shapes: sites tracked = %d (cap=%d)\n", |
| 55 | + g_call_shapes_used, CALL_SHAPE_SITES_MAX); |
| 56 | + for (i = 0; i < g_call_shapes_used; i++) { |
| 57 | + if (g_call_shapes[i].total >= 10000) { |
| 58 | + hot++; |
| 59 | + hot_hits += g_call_shapes[i].total; |
| 60 | + if (g_call_shapes[i].callee_fn != NULL |
| 61 | + && g_call_shapes[i].monomorphic_int_pair * 10 |
| 62 | + >= g_call_shapes[i].total * 9) { |
| 63 | + mono_hot++; |
| 64 | + mono_hits += g_call_shapes[i].monomorphic_int_pair; |
| 65 | + } |
| 66 | + } |
| 67 | + } |
| 68 | + fprintf(stderr, " hot sites (>=10k calls): %d, hits=%zu\n", |
| 69 | + hot, hot_hits); |
| 70 | + fprintf(stderr, " hot+monomorphic-int-prim (>=90%% int pair) sites:" |
| 71 | + " %d, hits=%zu\n", |
| 72 | + mono_hot, mono_hits); |
| 73 | + /* Detail of top monomorphic-int sites */ |
| 74 | + if (mono_hot > 0) { |
| 75 | + int printed = 0; |
| 76 | + fprintf(stderr, " top monomorphic-int sites:\n"); |
| 77 | + for (i = 0; i < g_call_shapes_used && printed < 20; i++) { |
| 78 | + if (g_call_shapes[i].total >= 10000 |
| 79 | + && g_call_shapes[i].callee_fn != NULL |
| 80 | + && g_call_shapes[i].monomorphic_int_pair * 10 |
| 81 | + >= g_call_shapes[i].total * 9) { |
| 82 | + fprintf(stderr, |
| 83 | + " %p %-16s total=%zu mono=%zu (%.1f%%)\n", |
| 84 | + g_call_shapes[i].slot, |
| 85 | + call_shape_prim_name(g_call_shapes[i].callee_fn), |
| 86 | + g_call_shapes[i].total, |
| 87 | + g_call_shapes[i].monomorphic_int_pair, |
| 88 | + 100.0 |
| 89 | + * (double)g_call_shapes[i].monomorphic_int_pair |
| 90 | + / (double)g_call_shapes[i].total); |
| 91 | + printed++; |
| 92 | + } |
| 93 | + } |
| 94 | + } |
| 95 | +} |
| 96 | +static void call_shape_record(const void *slot_ptr, mino_val_t *callee, |
| 97 | + mino_val_t **argv, int argc) |
| 98 | +{ |
| 99 | + int i; |
| 100 | + int idx = -1; |
| 101 | + if (!g_call_shapes_atexit_done) { |
| 102 | + atexit(call_shapes_dump); |
| 103 | + g_call_shapes_atexit_done = 1; |
| 104 | + } |
| 105 | + for (i = 0; i < g_call_shapes_used; i++) { |
| 106 | + if (g_call_shapes[i].slot == slot_ptr) { idx = i; break; } |
| 107 | + } |
| 108 | + if (idx < 0) { |
| 109 | + if (g_call_shapes_used >= CALL_SHAPE_SITES_MAX) return; |
| 110 | + idx = g_call_shapes_used++; |
| 111 | + g_call_shapes[idx].slot = slot_ptr; |
| 112 | + g_call_shapes[idx].callee_fn = |
| 113 | + (callee != NULL && mino_type_of(callee) == MINO_PRIM) |
| 114 | + ? callee->as.prim.fn |
| 115 | + : NULL; |
| 116 | + g_call_shapes[idx].total = 0; |
| 117 | + g_call_shapes[idx].monomorphic_int_pair = 0; |
| 118 | + g_call_shapes[idx].other_shapes = 0; |
| 119 | + } |
| 120 | + g_call_shapes[idx].total++; |
| 121 | + if (argc == 2 && argv[0] != NULL && argv[1] != NULL |
| 122 | + && mino_val_int_p(argv[0]) && mino_val_int_p(argv[1])) { |
| 123 | + g_call_shapes[idx].monomorphic_int_pair++; |
| 124 | + } else { |
| 125 | + g_call_shapes[idx].other_shapes++; |
| 126 | + } |
| 127 | +} |
| 128 | +static const char *call_shape_prim_name(mino_prim_fn fn) |
| 129 | +{ |
| 130 | + if (fn == NULL) return "(non-prim)"; |
| 131 | + if (fn == prim_add) return "prim_add"; |
| 132 | + if (fn == prim_sub) return "prim_sub"; |
| 133 | + if (fn == prim_mul) return "prim_mul"; |
| 134 | + if (fn == prim_addp) return "prim_addp"; |
| 135 | + if (fn == prim_subp) return "prim_subp"; |
| 136 | + if (fn == prim_mulp) return "prim_mulp"; |
| 137 | + if (fn == prim_bit_and) return "prim_bit_and"; |
| 138 | + if (fn == prim_bit_or) return "prim_bit_or"; |
| 139 | + if (fn == prim_bit_xor) return "prim_bit_xor"; |
| 140 | + return "(other-prim)"; |
| 141 | +} |
| 142 | +#endif |
| 143 | + |
32 | 144 | #ifdef MINO_BC_OP_COUNTS |
33 | 145 | /* Per-opcode dispatch counter, populated when the binary is built with |
34 | 146 | * -DMINO_BC_OP_COUNTS=1. Dumped to stderr at process exit. Used during |
@@ -1122,6 +1234,9 @@ mino_val_t *mino_bc_run(mino_state_t *S, mino_val_t *fn_val, |
1122 | 1234 | mino_val_t *callee = ic_resolve_global(S, bc, slot, env, |
1123 | 1235 | dyn_active); |
1124 | 1236 | if (callee == NULL) { ok = 0; goto bc_done; } |
| 1237 | +#ifdef MINO_CALL_SITE_SHAPES |
| 1238 | + call_shape_record(slot, callee, regs + a, (int)argn); |
| 1239 | +#endif |
1125 | 1240 | mino_val_t *r = apply_callable_argv(S, callee, regs + a, |
1126 | 1241 | (int)argn, env); |
1127 | 1242 | if (r == NULL) { ok = 0; goto bc_done; } |
|
0 commit comments