[mypyc] Reduce impact of immortality on reference counting performance (#18459)

JukkaL · web-flow · commit 58de753b6b76 · 2025-01-21T18:08:23.000Z
Fixes mypyc/mypyc#1044. The addition of object immortality in Python 3.12 (PEP 683) introduced an extra immortality check to incref and decref operations. Objects with a specific reference count are treated as immortal, and their reference counts are never updated. It turns out that this slowed down the performance of certain workloads a lot (up to 70% increase in runtime, compared to 3.11). This PR reduces the impact of immortality via a few optimizations: 1. Assume instances of native classes and list objects are not immortal (skip immortality checks). 2. Skip incref of certain objects in some contexts when we know that they are immortal (e.g. avoid incref of `None`). The second change should be clear. We generally depend on CPython implementation details to improve performance, and this seems safe to do here as well. The first change could turn immortal objects into non-immortal ones. For native classes this is a decision we can arguably make -- native classes don't properly support immortality, and they can't be shared between subinterpreters. As discussed in PEP 683, skipping immortality checks here is acceptable even in cases where somebody tries to make a native instance immortal, but this could have some performance or memory use impact. The performance gains make this a good tradeoff. Since lists are mutable, they can't be safely shared between subinterpreters, so again not dealing with immortality is acceptable. It could reduce performance in some use cases by deimmortalizing lists, but this potential impact seems marginal compared to faster incref and decref operations on lists, which are some of the more common objects in Python programs. This speeds up self check by about 1.5% on Python 3.13. This speeds up the richards benchmark by 30-35% (!) on 3.13, and also some other benchmarks see smaller improvements.
diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py
@@ -12,6 +12,7 @@
     ATTR_PREFIX,
     BITMAP_BITS,
     FAST_ISINSTANCE_MAX_SUBCLASSES,
+    HAVE_IMMORTAL,
     NATIVE_PREFIX,
     REG_PREFIX,
     STATIC_PREFIX,
@@ -511,8 +512,11 @@ def emit_inc_ref(self, dest: str, rtype: RType, *, rare: bool = False) -> None:
             for i, item_type in enumerate(rtype.types):
                 self.emit_inc_ref(f"{dest}.f{i}", item_type)
         elif not rtype.is_unboxed:
-            # Always inline, since this is a simple op
-            self.emit_line("CPy_INCREF(%s);" % dest)
+            # Always inline, since this is a simple but very hot op
+            if rtype.may_be_immortal or not HAVE_IMMORTAL:
+                self.emit_line("CPy_INCREF(%s);" % dest)
+            else:
+                self.emit_line("CPy_INCREF_NO_IMM(%s);" % dest)
         # Otherwise assume it's an unboxed, pointerless value and do nothing.
 
     def emit_dec_ref(
@@ -540,7 +544,10 @@ def emit_dec_ref(
                 self.emit_line(f"CPy_{x}DecRef({dest});")
             else:
                 # Inlined
-                self.emit_line(f"CPy_{x}DECREF({dest});")
+                if rtype.may_be_immortal or not HAVE_IMMORTAL:
+                    self.emit_line(f"CPy_{x}DECREF({dest});")
+                else:
+                    self.emit_line(f"CPy_{x}DECREF_NO_IMM({dest});")
         # Otherwise assume it's an unboxed, pointerless value and do nothing.
 
     def pretty_name(self, typ: RType) -> str:
diff --git a/mypyc/codegen/emitfunc.py b/mypyc/codegen/emitfunc.py
@@ -7,6 +7,7 @@
 from mypyc.analysis.blockfreq import frequently_executed_blocks
 from mypyc.codegen.emit import DEBUG_ERRORS, Emitter, TracebackAndGotoHandler, c_array_initializer
 from mypyc.common import (
+    HAVE_IMMORTAL,
     MODULE_PREFIX,
     NATIVE_PREFIX,
     REG_PREFIX,
@@ -76,9 +77,11 @@
     RStruct,
     RTuple,
     RType,
+    is_bool_rprimitive,
     is_int32_rprimitive,
     is_int64_rprimitive,
     is_int_rprimitive,
+    is_none_rprimitive,
     is_pointer_rprimitive,
     is_tagged,
 )
@@ -578,6 +581,21 @@ def emit_method_call(self, dest: str, op_obj: Value, name: str, op_args: list[Va
             )
 
     def visit_inc_ref(self, op: IncRef) -> None:
+        if (
+            isinstance(op.src, Box)
+            and (is_none_rprimitive(op.src.src.type) or is_bool_rprimitive(op.src.src.type))
+            and HAVE_IMMORTAL
+        ):
+            # On Python 3.12+, None/True/False are immortal, and we can skip inc ref
+            return
+
+        if isinstance(op.src, LoadLiteral) and HAVE_IMMORTAL:
+            value = op.src.value
+            # We can skip inc ref for immortal literals on Python 3.12+
+            if type(value) is int and -5 <= value <= 256:
+                # Small integers are immortal
+                return
+
         src = self.reg(op.src)
         self.emit_inc_ref(src, op.src.type)
 
diff --git a/mypyc/common.py b/mypyc/common.py
@@ -82,6 +82,12 @@
     "pythonsupport.c",
 ]
 
+# Python 3.12 introduced immortal objects, specified via a special reference count
+# value. The reference counts of immortal objects are normally not modified, but it's
+# not strictly wrong to modify them. See PEP 683 for more information, but note that
+# some details in the PEP are out of date.
+HAVE_IMMORTAL: Final = sys.version_info >= (3, 12)
+
 
 JsonDict = dict[str, Any]
 
diff --git a/mypyc/ir/rtypes.py b/mypyc/ir/rtypes.py
@@ -26,7 +26,7 @@
 from typing import TYPE_CHECKING, ClassVar, Final, Generic, TypeVar
 from typing_extensions import TypeGuard
 
-from mypyc.common import IS_32_BIT_PLATFORM, PLATFORM_SIZE, JsonDict, short_name
+from mypyc.common import HAVE_IMMORTAL, IS_32_BIT_PLATFORM, PLATFORM_SIZE, JsonDict, short_name
 from mypyc.namegen import NameGenerator
 
 if TYPE_CHECKING:
@@ -69,6 +69,11 @@ def accept(self, visitor: RTypeVisitor[T]) -> T:
     def short_name(self) -> str:
         return short_name(self.name)
 
+    @property
+    @abstractmethod
+    def may_be_immortal(self) -> bool:
+        raise NotImplementedError
+
     def __str__(self) -> str:
         return short_name(self.name)
 
@@ -151,6 +156,10 @@ class RVoid(RType):
     def accept(self, visitor: RTypeVisitor[T]) -> T:
         return visitor.visit_rvoid(self)
 
+    @property
+    def may_be_immortal(self) -> bool:
+        return False
+
     def serialize(self) -> str:
         return "void"
 
@@ -193,6 +202,7 @@ def __init__(
         ctype: str = "PyObject *",
         size: int = PLATFORM_SIZE,
         error_overlap: bool = False,
+        may_be_immortal: bool = True,
     ) -> None:
         RPrimitive.primitive_map[name] = self
 
@@ -204,6 +214,7 @@ def __init__(
         self._ctype = ctype
         self.size = size
         self.error_overlap = error_overlap
+        self._may_be_immortal = may_be_immortal and HAVE_IMMORTAL
         if ctype == "CPyTagged":
             self.c_undefined = "CPY_INT_TAG"
         elif ctype in ("int16_t", "int32_t", "int64_t"):
@@ -230,6 +241,10 @@ def __init__(
     def accept(self, visitor: RTypeVisitor[T]) -> T:
         return visitor.visit_rprimitive(self)
 
+    @property
+    def may_be_immortal(self) -> bool:
+        return self._may_be_immortal
+
     def serialize(self) -> str:
         return self.name
 
@@ -433,8 +448,12 @@ def __hash__(self) -> int:
     "builtins.None", is_unboxed=True, is_refcounted=False, ctype="char", size=1
 )
 
-# Python list object (or an instance of a subclass of list).
-list_rprimitive: Final = RPrimitive("builtins.list", is_unboxed=False, is_refcounted=True)
+# Python list object (or an instance of a subclass of list). These could be
+# immortal, but since this is expected to be very rare, and the immortality checks
+# can be pretty expensive for lists, we treat lists as non-immortal.
+list_rprimitive: Final = RPrimitive(
+    "builtins.list", is_unboxed=False, is_refcounted=True, may_be_immortal=False
+)
 
 # Python dict object (or an instance of a subclass of dict).
 dict_rprimitive: Final = RPrimitive("builtins.dict", is_unboxed=False, is_refcounted=True)
@@ -642,6 +661,10 @@ def __init__(self, types: list[RType]) -> None:
     def accept(self, visitor: RTypeVisitor[T]) -> T:
         return visitor.visit_rtuple(self)
 
+    @property
+    def may_be_immortal(self) -> bool:
+        return False
+
     def __str__(self) -> str:
         return "tuple[%s]" % ", ".join(str(typ) for typ in self.types)
 
@@ -763,6 +786,10 @@ def __init__(self, name: str, names: list[str], types: list[RType]) -> None:
     def accept(self, visitor: RTypeVisitor[T]) -> T:
         return visitor.visit_rstruct(self)
 
+    @property
+    def may_be_immortal(self) -> bool:
+        return False
+
     def __str__(self) -> str:
         # if not tuple(unnamed structs)
         return "{}{{{}}}".format(
@@ -823,6 +850,10 @@ def __init__(self, class_ir: ClassIR) -> None:
     def accept(self, visitor: RTypeVisitor[T]) -> T:
         return visitor.visit_rinstance(self)
 
+    @property
+    def may_be_immortal(self) -> bool:
+        return False
+
     def struct_name(self, names: NameGenerator) -> str:
         return self.class_ir.struct_name(names)
 
@@ -883,6 +914,10 @@ def make_simplified_union(items: list[RType]) -> RType:
     def accept(self, visitor: RTypeVisitor[T]) -> T:
         return visitor.visit_runion(self)
 
+    @property
+    def may_be_immortal(self) -> bool:
+        return any(item.may_be_immortal for item in self.items)
+
     def __repr__(self) -> str:
         return "<RUnion %s>" % ", ".join(str(item) for item in self.items)
 
@@ -953,6 +988,10 @@ def __init__(self, item_type: RType, length: int) -> None:
     def accept(self, visitor: RTypeVisitor[T]) -> T:
         return visitor.visit_rarray(self)
 
+    @property
+    def may_be_immortal(self) -> bool:
+        return False
+
     def __str__(self) -> str:
         return f"{self.item_type}[{self.length}]"
 
diff --git a/mypyc/lib-rt/mypyc_util.h b/mypyc/lib-rt/mypyc_util.h
@@ -31,6 +31,35 @@
 // Here just for consistency
 #define CPy_XDECREF(p) Py_XDECREF(p)
 
+// The *_NO_IMM operations below perform refcount manipulation for
+// non-immortal objects (Python 3.12 and later).
+//
+// Py_INCREF and other CPython operations check for immortality. This
+// can be expensive when we know that an object cannot be immortal.
+
+static inline void CPy_INCREF_NO_IMM(PyObject *op)
+{
+    op->ob_refcnt++;
+}
+
+static inline void CPy_DECREF_NO_IMM(PyObject *op)
+{
+    if (--op->ob_refcnt == 0) {
+        _Py_Dealloc(op);
+    }
+}
+
+static inline void CPy_XDECREF_NO_IMM(PyObject *op)
+{
+    if (op != NULL && --op->ob_refcnt == 0) {
+        _Py_Dealloc(op);
+    }
+}
+
+#define CPy_INCREF_NO_IMM(op) CPy_INCREF_NO_IMM((PyObject *)(op))
+#define CPy_DECREF_NO_IMM(op) CPy_DECREF_NO_IMM((PyObject *)(op))
+#define CPy_XDECREF_NO_IMM(op) CPy_XDECREF_NO_IMM((PyObject *)(op))
+
 // Tagged integer -- our representation of Python 'int' objects.
 // Small enough integers are represented as unboxed integers (shifted
 // left by 1); larger integers (larger than 63 bits on a 64-bit
diff --git a/mypyc/test/test_emit.py b/mypyc/test/test_emit.py
diff --git a/mypyc/test/test_emitfunc.py b/mypyc/test/test_emitfunc.py