From e659d4c6ac30151ba46a992d3ff80c392891b4ef Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Fri, 17 Jan 2025 12:34:57 +0000 Subject: [PATCH 1/4] gh-128842: collect JIT memory stats via pystats Collect via pystats the following metrics: * total memory size * code size * trampoline size * data size * padding seze * freed memory size --- Include/cpython/pystats.h | 7 ++++ Include/internal/pycore_code.h | 2 ++ Python/jit.c | 7 ++++ Python/specialize.c | 6 ++++ Tools/scripts/summarize_stats.py | 58 ++++++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index ee8885cda7b60d..ccecd627ff0bd2 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -141,6 +141,13 @@ typedef struct _optimization_stats { uint64_t remove_globals_builtins_changed; uint64_t remove_globals_incorrect_keys; uint64_t error_in_opcode[PYSTATS_MAX_UOP_ID + 1]; + // JIT memory stats + uint64_t jit_total_memory_size; + uint64_t jit_code_size; + uint64_t jit_trampoline_size; + uint64_t jit_data_size; + uint64_t jit_padding_size; + uint64_t jit_freed_memory_size; } OptimizationStats; typedef struct _rare_event_stats { diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index f5eddab6e90e41..799400762542de 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -375,6 +375,7 @@ extern void _Py_Specialize_ContainsOp(_PyStackRef value, _Py_CODEUNIT *instr); do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0) #define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0) #define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0) +#define OPT_STAT_ADD(name, n) do { if (_Py_stats) _Py_stats->optimization_stats.name += (n); } while (0) #define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0) #define UOP_PAIR_INC(uopcode, lastuop) \ do { \ @@ -410,6 +411,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0) #define GC_STAT_ADD(gen, name, n) ((void)0) #define OPT_STAT_INC(name) ((void)0) +#define OPT_STAT_ADD(name, n) ((void)0) #define UOP_STAT_INC(opname, name) ((void)0) #define UOP_PAIR_INC(uopcode, lastuop) ((void)0) #define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) diff --git a/Python/jit.c b/Python/jit.c index 7dd0da7a45055a..4146d6bdc23c2f 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -87,6 +87,7 @@ jit_free(unsigned char *memory, size_t size) jit_error("unable to free memory"); return -1; } + OPT_STAT_ADD(jit_freed_memory_size, size); return 0; } @@ -510,6 +511,12 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz #ifdef MAP_JIT pthread_jit_write_protect_np(0); #endif + // Collect memory stats + OPT_STAT_ADD(jit_total_memory_size, total_size); + OPT_STAT_ADD(jit_code_size, code_size); + OPT_STAT_ADD(jit_trampoline_size, state.trampolines.size); + OPT_STAT_ADD(jit_data_size, data_size); + OPT_STAT_ADD(jit_padding_size, padding); // Update the offsets of each instruction: for (size_t i = 0; i < length; i++) { state.instruction_starts[i] += (uintptr_t)memory; diff --git a/Python/specialize.c b/Python/specialize.c index eb599028cefafa..e30dec5779e115 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -309,6 +309,12 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) ); } } + fprintf(out, "JIT total memory size: %" PRIu64 "\n", stats->jit_total_memory_size); + fprintf(out, "JIT code size: %" PRIu64 "\n", stats->jit_code_size); + fprintf(out, "JIT trampoline size: %" PRIu64 "\n", stats->jit_trampoline_size); + fprintf(out, "JIT data size: %" PRIu64 "\n", stats->jit_data_size); + fprintf(out, "JIT padding size: %" PRIu64 "\n", stats->jit_padding_size); + fprintf(out, "JIT freed memory size: %" PRIu64 "\n", stats->jit_freed_memory_size); } #endif diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 17478933eb68e5..68d6beebc6de9c 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -545,6 +545,41 @@ def get_optimizer_stats(self) -> dict[str, tuple[int, int | None]]: ): (incorrect_keys, attempts), } + def get_jit_memory_stats(self) -> dict[Doc, tuple[int, int | None]]: + jit_total_memory_size = self._data["JIT total memory size"] + jit_code_size = self._data["JIT code size"] + jit_trampoline_size = self._data["JIT trampoline size"] + jit_data_size = self._data["JIT data size"] + jit_padding_size = self._data["JIT padding size"] + jit_freed_memory_size = self._data["JIT freed memory size"] + + return { + Doc( + "Total memory size", + "The total size of the memory allocated for the JIT traces", + ): (jit_total_memory_size, None), + Doc( + "Code size", + "The size of the memory allocated for the code of the JIT traces", + ): (jit_code_size, jit_total_memory_size), + Doc( + "Trampoline size", + "The size of the memory allocated for the trampolines of the JIT traces", + ): (jit_trampoline_size, jit_total_memory_size), + Doc( + "Data size", + "The size of the memory allocated for the data of the JIT traces", + ): (jit_data_size, jit_total_memory_size), + Doc( + "Padding size", + "The size of the memory allocated for the padding of the JIT traces", + ): (jit_padding_size, jit_total_memory_size), + Doc( + "Freed memory size", + "The size of the memory freed from the JIT traces", + ): (jit_freed_memory_size, jit_total_memory_size), + } + def get_histogram(self, prefix: str) -> list[tuple[int, int]]: rows = [] for k, v in self._data.items(): @@ -1161,6 +1196,18 @@ def calc_optimizer_table(stats: Stats) -> Rows: for label, (value, den) in optimizer_stats.items() ] + def calc_jit_memory_table(stats: Stats) -> Rows: + jit_memory_stats = stats.get_jit_memory_stats() + + return [ + ( + label, + Count(value), + Ratio(value, den, percentage=label != "Total memory size"), + ) + for label, (value, den) in jit_memory_stats.items() + ] + def calc_histogram_table(key: str, den: str) -> RowCalculator: def calc(stats: Stats) -> Rows: histogram = stats.get_histogram(key) @@ -1214,6 +1261,17 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None) yield Table(("", "Count:", "Ratio:"), calc_optimization_table, JoinMode.CHANGE) yield Table(("", "Count:", "Ratio:"), calc_optimizer_table, JoinMode.CHANGE) + yield Section( + "JIT memory stats", + "", + [ + Table( + ("", "Size (bytes):", "Ratio:"), + calc_jit_memory_table, + JoinMode.CHANGE + ) + ], + ) for name, den in [ ("Trace length", "Optimization traces created"), ("Optimized trace length", "Optimization traces created"), From 3122c1d338df2782a38dafb303f7da93140b666d Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 17 Jan 2025 13:16:17 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-01-17-13-16-14.gh-issue-128842.OMs5X6.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-13-16-14.gh-issue-128842.OMs5X6.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-13-16-14.gh-issue-128842.OMs5X6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-13-16-14.gh-issue-128842.OMs5X6.rst new file mode 100644 index 00000000000000..9898060076db79 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-17-13-16-14.gh-issue-128842.OMs5X6.rst @@ -0,0 +1 @@ +Collect JIT memory stats using pystats. Patch by Diego Russo. From d5618d7c7c99b165ef6ede02bd0826d557a7e23b Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Thu, 23 Jan 2025 08:43:23 +0000 Subject: [PATCH 3/4] Add histogram of the trace sizes --- Include/cpython/pystats.h | 1 + Python/jit.c | 1 + Python/specialize.c | 2 ++ Tools/scripts/summarize_stats.py | 21 +++++++++++++++++++-- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index ccecd627ff0bd2..f52348e42b1330 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -148,6 +148,7 @@ typedef struct _optimization_stats { uint64_t jit_data_size; uint64_t jit_padding_size; uint64_t jit_freed_memory_size; + uint64_t trace_total_memory_hist[_Py_UOP_HIST_SIZE]; } OptimizationStats; typedef struct _rare_event_stats { diff --git a/Python/jit.c b/Python/jit.c index 4146d6bdc23c2f..4d1bf506e5a50a 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -517,6 +517,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz OPT_STAT_ADD(jit_trampoline_size, state.trampolines.size); OPT_STAT_ADD(jit_data_size, data_size); OPT_STAT_ADD(jit_padding_size, padding); + OPT_HIST(total_size, trace_total_memory_hist); // Update the offsets of each instruction: for (size_t i = 0; i < length; i++) { state.instruction_starts[i] += (uintptr_t)memory; diff --git a/Python/specialize.c b/Python/specialize.c index e30dec5779e115..ef4ca17cd5fb73 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -315,6 +315,8 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) fprintf(out, "JIT data size: %" PRIu64 "\n", stats->jit_data_size); fprintf(out, "JIT padding size: %" PRIu64 "\n", stats->jit_padding_size); fprintf(out, "JIT freed memory size: %" PRIu64 "\n", stats->jit_freed_memory_size); + + print_histogram(out, "Trace total memory size", stats->trace_total_memory_hist); } #endif diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 68d6beebc6de9c..7f60476bde86df 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -1208,10 +1208,16 @@ def calc_jit_memory_table(stats: Stats) -> Rows: for label, (value, den) in jit_memory_stats.items() ] - def calc_histogram_table(key: str, den: str) -> RowCalculator: + def calc_histogram_table(key: str, den: str | None = None) -> RowCalculator: def calc(stats: Stats) -> Rows: histogram = stats.get_histogram(key) - denominator = stats.get(den) + + if den: + denominator = stats.get(den) + else: + denominator = 0 + for _, v in histogram: + denominator += v rows: Rows = [] last_non_zero = 0 @@ -1272,6 +1278,17 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None) ) ], ) + yield Section( + "JIT trace total memory histogram", + "", + [ + Table( + ("Size (bytes)", "Count", "Ratio:"), + calc_histogram_table("Trace total memory size"), + JoinMode.CHANGE_NO_SORT, + ) + ], + ) for name, den in [ ("Trace length", "Optimization traces created"), ("Optimized trace length", "Optimization traces created"), From 75e147f67cac2d350ef5dbe454b88bf90e2606ae Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Fri, 31 Jan 2025 14:19:50 +0000 Subject: [PATCH 4/4] Address Brandt's feedback. --- Tools/scripts/summarize_stats.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 7f60476bde86df..2a3ed18371d81e 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -1220,10 +1220,7 @@ def calc(stats: Stats) -> Rows: denominator += v rows: Rows = [] - last_non_zero = 0 for k, v in histogram: - if v != 0: - last_non_zero = len(rows) rows.append( ( f"<= {k:,d}", @@ -1231,9 +1228,19 @@ def calc(stats: Stats) -> Rows: Ratio(v, denominator), ) ) - # Don't include any zero entries at the end - rows = rows[: last_non_zero + 1] - return rows + # Don't include any leading and trailing zero entries + start = 0 + end = len(rows) - 1 + + while start <= end: + if rows[start][1] == 0: + start += 1 + elif rows[end][1] == 0: + end -= 1 + else: + break + + return rows[start:end+1] return calc @@ -1269,7 +1276,7 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None) yield Table(("", "Count:", "Ratio:"), calc_optimizer_table, JoinMode.CHANGE) yield Section( "JIT memory stats", - "", + "JIT memory stats", [ Table( ("", "Size (bytes):", "Ratio:"), @@ -1280,7 +1287,7 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None) ) yield Section( "JIT trace total memory histogram", - "", + "JIT trace total memory histogram", [ Table( ("Size (bytes)", "Count", "Ratio:"),