Fix cubic-time from_type() on recursive abstract class hierarchies

claude · claude · commit ed312aa4cbba · 2026-05-27T07:07:24.000Z
Resolving an abstract type whose subclasses reference the base class in their own annotations re-resolved the entire hierarchy once per reference, taking time cubic in the number of subclasses. Guard against re-entry so each type is resolved only once: if we're already resolving an abstract type higher up the stack, return the cached from_type() strategy (sharing one object lets the recursion in e.g. is_empty checks terminate) instead of re-resolving it. We reuse the existing _recurse_guard, which also catches references that reach us as a union arg rather than a field. Also filter the registered-subtype lookup before sorting it, so the expensive repr-based sort only runs on the (usually empty) matching set rather than the whole global type lookup on every resolution. https://claude.ai/code/session_01MdLX8p4tdAUaDjSHBnveVy
diff --git a/hypothesis/RELEASE.rst b/hypothesis/RELEASE.rst
@@ -0,0 +1,7 @@
+RELEASE_TYPE: patch
+
+This patch dramatically improves the performance of
+:func:`~hypothesis.strategies.from_type` on hierarchies of abstract classes
+whose subclasses refer back to the base class (directly, or via a sibling
+subclass) in their annotations.  Resolution previously took time cubic in the
+number of subclasses; we now resolve each type only once (:issue:`4729`).
diff --git a/hypothesis/src/hypothesis/strategies/_internal/core.py b/hypothesis/src/hypothesis/strategies/_internal/core.py
@@ -1552,16 +1552,20 @@ def _get_typeddict_qualifiers(key, annotation_type):
     # a subclass of `thing` and are not themselves a subtype of any other such
     # type.  For example, `Number -> integers() | floats()`, but bools() is
     # not included because bool is a subclass of int as well as Number.
+    # Filter to matching subtypes *before* sorting, because computing the repr
+    # of every registered strategy (just to establish a deterministic order) is
+    # surprisingly expensive and usually wasted - the matching set is typically
+    # empty for user-defined types.
+    matching = [
+        (k, v)
+        for k, v in types._global_type_lookup.items()
+        if isinstance(k, type)
+        and issubclass(k, thing)
+        and sum(types.try_issubclass(k, typ) for typ in types._global_type_lookup) == 1
+    ]
     strategies = [
         s
-        for s in (
-            as_strategy(v, thing)
-            for k, v in sorted(types._global_type_lookup.items(), key=repr)
-            if isinstance(k, type)
-            and issubclass(k, thing)
-            and sum(types.try_issubclass(k, typ) for typ in types._global_type_lookup)
-            == 1
-        )
+        for s in (as_strategy(v, thing) for _, v in sorted(matching, key=repr))
         if s is not NotImplemented
     ]
     if any(not s.is_empty for s in strategies):
@@ -1644,12 +1648,35 @@ def _get_typeddict_qualifiers(key, annotation_type):
             "type without any subclasses. Consider using register_type_strategy"
         )
 
-    subclass_strategies: SearchStrategy = nothing()
-    for sc in subclasses:
-        try:
-            subclass_strategies |= _from_type(sc)
-        except Exception:
-            pass
+    # When subclasses reference `thing` (directly, or via a sibling subclass)
+    # in their own annotations, naively resolving each subclass would re-resolve
+    # the entire hierarchy once per reference - which is combinatorially
+    # expensive for mutually-recursive types.  So we guard against re-entry: if
+    # we're already resolving `thing` higher up the stack, return the cached
+    # strategy (so recursive references share one object, which lets recursion in
+    # e.g. is_empty checks terminate) rather than resolving it again.
+    #
+    # `from_type_guarded` only adds field annotations to the guard, and signals
+    # re-entry by raising RewindRecursive; we add `thing` here as well so that we
+    # also catch references reaching us by other routes, e.g. as a union arg.
+    try:
+        recurse_guard = _recurse_guard.get()
+    except LookupError:
+        _recurse_guard.set(recurse_guard := [])
+    if thing in recurse_guard:
+        return from_type(thing)
+
+    recurse_guard.append(thing)
+    try:
+        substrategies = []
+        for sc in subclasses:
+            try:
+                substrategies.append(_from_type(sc))
+            except Exception:
+                pass
+    finally:
+        recurse_guard.pop()
+    subclass_strategies = one_of(substrategies)
     if subclass_strategies.is_empty:
         # We're unable to resolve subclasses now, but we might be able to later -
         # so we'll just go back to the mixed distribution.
diff --git a/hypothesis/tests/cover/test_lookup.py b/hypothesis/tests/cover/test_lookup.py
@@ -919,6 +919,53 @@ def test_cannot_resolve_abstract_class_with_no_concrete_subclass(instance):
     raise AssertionError("test body unreachable as strategy cannot resolve")
 
 
+def test_resolving_mutually_recursive_abstract_subclasses_is_efficient(monkeypatch):
+    # Resolving an abstract type whose many subclasses refer back to it (directly
+    # or via a sibling) used to re-resolve the whole hierarchy once per reference,
+    # taking time cubic in the number of subclasses.  We now resolve each type
+    # only once, so the (linear) number of get_type_hints calls is our regression
+    # metric - the old behaviour ran into the thousands even for N=15.
+    import dataclasses
+
+    from hypothesis.strategies._internal import core
+
+    class Stmt(abc.ABC):
+        @abc.abstractmethod
+        def f(self) -> str: ...
+
+    # A leaf subclass gives the recursion a base case so generation can terminate.
+    Leaf = dataclasses.make_dataclass(
+        "Leaf", [("v", int)], bases=(Stmt,), namespace={"f": lambda self: ""}
+    )
+    n = 15
+    # Keep a reference to the subclasses so they aren't garbage-collected (which
+    # would remove them from Stmt.__subclasses__()) before we resolve the type.
+    subclasses = [Leaf] + [
+        dataclasses.make_dataclass(
+            f"S{i}",
+            [("a", Stmt), ("b", typing.Optional[Stmt])],
+            bases=(Stmt,),
+            namespace={"f": lambda self: ""},
+        )
+        for i in range(n)
+    ]
+    assert set(Stmt.__subclasses__()) == set(subclasses)
+
+    calls = 0
+    real_get_type_hints = core.get_type_hints
+
+    def counting_get_type_hints(thing):
+        nonlocal calls
+        calls += 1
+        return real_get_type_hints(thing)
+
+    monkeypatch.setattr(core, "get_type_hints", counting_get_type_hints)
+    st.from_type(Stmt).validate()
+    assert calls < 50 * n
+
+    find_any(st.from_type(Stmt), lambda x: isinstance(x, Leaf))
+
+
 def test_type_with_unresolvable_forward_reference_fails():
     t = type["UnknownForwardRef"]  # noqa: F821
     with pytest.raises(ResolutionFailed):