diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index ee8885cda7b60d..f52348e42b1330 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -141,6 +141,14 @@ typedef struct _optimization_stats { uint64_t remove_globals_builtins_changed; uint64_t remove_globals_incorrect_keys; uint64_t error_in_opcode[PYSTATS_MAX_UOP_ID + 1]; + // JIT memory stats + uint64_t jit_total_memory_size; + uint64_t jit_code_size; + uint64_t jit_trampoline_size; + uint64_t jit_data_size; + uint64_t jit_padding_size; + uint64_t jit_freed_memory_size; + uint64_t trace_total_memory_hist[_Py_UOP_HIST_SIZE]; } OptimizationStats; typedef struct _rare_event_stats { diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index f5eddab6e90e41..799400762542de 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -375,6 +375,7 @@ extern void _Py_Specialize_ContainsOp(_PyStackRef value, _Py_CODEUNIT *instr); do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0) #define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0) #define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0) +#define OPT_STAT_ADD(name, n) do { if (_Py_stats) _Py_stats->optimization_stats.name += (n); } while (0) #define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0) #define UOP_PAIR_INC(uopcode, lastuop) \ do { \ @@ -410,6 +411,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0) #define GC_STAT_ADD(gen, name, n) ((void)0) #define OPT_STAT_INC(name) ((void)0) +#define OPT_STAT_ADD(name, n) ((void)0) #define UOP_STAT_INC(opname, name) ((void)0) #define UOP_PAIR_INC(uopcode, lastuop) ((void)0) #define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-13-16-14.gh-issue-128842.OMs5X6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-13-16-14.gh-issue-128842.OMs5X6.rst new file mode 100644 index 00000000000000..9898060076db79 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-13-16-14.gh-issue-128842.OMs5X6.rst @@ -0,0 +1 @@ +Collect JIT memory stats using pystats. Patch by Diego Russo. diff --git a/Python/jit.c b/Python/jit.c index 7dd0da7a45055a..4d1bf506e5a50a 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -87,6 +87,7 @@ jit_free(unsigned char *memory, size_t size) jit_error("unable to free memory"); return -1; } + OPT_STAT_ADD(jit_freed_memory_size, size); return 0; } @@ -510,6 +511,13 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz #ifdef MAP_JIT pthread_jit_write_protect_np(0); #endif + // Collect memory stats + OPT_STAT_ADD(jit_total_memory_size, total_size); + OPT_STAT_ADD(jit_code_size, code_size); + OPT_STAT_ADD(jit_trampoline_size, state.trampolines.size); + OPT_STAT_ADD(jit_data_size, data_size); + OPT_STAT_ADD(jit_padding_size, padding); + OPT_HIST(total_size, trace_total_memory_hist); // Update the offsets of each instruction: for (size_t i = 0; i < length; i++) { state.instruction_starts[i] += (uintptr_t)memory; diff --git a/Python/specialize.c b/Python/specialize.c index eb599028cefafa..ef4ca17cd5fb73 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -309,6 +309,14 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) ); } } + fprintf(out, "JIT total memory size: %" PRIu64 "\n", stats->jit_total_memory_size); + fprintf(out, "JIT code size: %" PRIu64 "\n", stats->jit_code_size); + fprintf(out, "JIT trampoline size: %" PRIu64 "\n", stats->jit_trampoline_size); + fprintf(out, "JIT data size: %" PRIu64 "\n", stats->jit_data_size); + fprintf(out, "JIT padding size: %" PRIu64 "\n", stats->jit_padding_size); + fprintf(out, "JIT freed memory size: %" PRIu64 "\n", stats->jit_freed_memory_size); + + print_histogram(out, "Trace total memory size", stats->trace_total_memory_hist); } #endif diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 17478933eb68e5..2a3ed18371d81e 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -545,6 +545,41 @@ def get_optimizer_stats(self) -> dict[str, tuple[int, int | None]]: ): (incorrect_keys, attempts), } + def get_jit_memory_stats(self) -> dict[Doc, tuple[int, int | None]]: + jit_total_memory_size = self._data["JIT total memory size"] + jit_code_size = self._data["JIT code size"] + jit_trampoline_size = self._data["JIT trampoline size"] + jit_data_size = self._data["JIT data size"] + jit_padding_size = self._data["JIT padding size"] + jit_freed_memory_size = self._data["JIT freed memory size"] + + return { + Doc( + "Total memory size", + "The total size of the memory allocated for the JIT traces", + ): (jit_total_memory_size, None), + Doc( + "Code size", + "The size of the memory allocated for the code of the JIT traces", + ): (jit_code_size, jit_total_memory_size), + Doc( + "Trampoline size", + "The size of the memory allocated for the trampolines of the JIT traces", + ): (jit_trampoline_size, jit_total_memory_size), + Doc( + "Data size", + "The size of the memory allocated for the data of the JIT traces", + ): (jit_data_size, jit_total_memory_size), + Doc( + "Padding size", + "The size of the memory allocated for the padding of the JIT traces", + ): (jit_padding_size, jit_total_memory_size), + Doc( + "Freed memory size", + "The size of the memory freed from the JIT traces", + ): (jit_freed_memory_size, jit_total_memory_size), + } + def get_histogram(self, prefix: str) -> list[tuple[int, int]]: rows = [] for k, v in self._data.items(): @@ -1161,16 +1196,31 @@ def calc_optimizer_table(stats: Stats) -> Rows: for label, (value, den) in optimizer_stats.items() ] - def calc_histogram_table(key: str, den: str) -> RowCalculator: + def calc_jit_memory_table(stats: Stats) -> Rows: + jit_memory_stats = stats.get_jit_memory_stats() + + return [ + ( + label, + Count(value), + Ratio(value, den, percentage=label != "Total memory size"), + ) + for label, (value, den) in jit_memory_stats.items() + ] + + def calc_histogram_table(key: str, den: str | None = None) -> RowCalculator: def calc(stats: Stats) -> Rows: histogram = stats.get_histogram(key) - denominator = stats.get(den) + + if den: + denominator = stats.get(den) + else: + denominator = 0 + for _, v in histogram: + denominator += v rows: Rows = [] - last_non_zero = 0 for k, v in histogram: - if v != 0: - last_non_zero = len(rows) rows.append( ( f"<= {k:,d}", @@ -1178,9 +1228,19 @@ def calc(stats: Stats) -> Rows: Ratio(v, denominator), ) ) - # Don't include any zero entries at the end - rows = rows[: last_non_zero + 1] - return rows + # Don't include any leading and trailing zero entries + start = 0 + end = len(rows) - 1 + + while start <= end: + if rows[start][1] == 0: + start += 1 + elif rows[end][1] == 0: + end -= 1 + else: + break + + return rows[start:end+1] return calc @@ -1214,6 +1274,28 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None) yield Table(("", "Count:", "Ratio:"), calc_optimization_table, JoinMode.CHANGE) yield Table(("", "Count:", "Ratio:"), calc_optimizer_table, JoinMode.CHANGE) + yield Section( + "JIT memory stats", + "JIT memory stats", + [ + Table( + ("", "Size (bytes):", "Ratio:"), + calc_jit_memory_table, + JoinMode.CHANGE + ) + ], + ) + yield Section( + "JIT trace total memory histogram", + "JIT trace total memory histogram", + [ + Table( + ("Size (bytes)", "Count", "Ratio:"), + calc_histogram_table("Trace total memory size"), + JoinMode.CHANGE_NO_SORT, + ) + ], + ) for name, den in [ ("Trace length", "Optimization traces created"), ("Optimized trace length", "Optimization traces created"),