Skip to content

Commit ed312aa

Browse files
committed
Fix cubic-time from_type() on recursive abstract class hierarchies
Resolving an abstract type whose subclasses reference the base class in their own annotations re-resolved the entire hierarchy once per reference, taking time cubic in the number of subclasses. Guard against re-entry so each type is resolved only once: if we're already resolving an abstract type higher up the stack, return the cached from_type() strategy (sharing one object lets the recursion in e.g. is_empty checks terminate) instead of re-resolving it. We reuse the existing _recurse_guard, which also catches references that reach us as a union arg rather than a field. Also filter the registered-subtype lookup before sorting it, so the expensive repr-based sort only runs on the (usually empty) matching set rather than the whole global type lookup on every resolution. https://claude.ai/code/session_01MdLX8p4tdAUaDjSHBnveVy
1 parent b1cc932 commit ed312aa

3 files changed

Lines changed: 95 additions & 14 deletions

File tree

hypothesis/RELEASE.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
RELEASE_TYPE: patch
2+
3+
This patch dramatically improves the performance of
4+
:func:`~hypothesis.strategies.from_type` on hierarchies of abstract classes
5+
whose subclasses refer back to the base class (directly, or via a sibling
6+
subclass) in their annotations. Resolution previously took time cubic in the
7+
number of subclasses; we now resolve each type only once (:issue:`4729`).

hypothesis/src/hypothesis/strategies/_internal/core.py

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,16 +1552,20 @@ def _get_typeddict_qualifiers(key, annotation_type):
15521552
# a subclass of `thing` and are not themselves a subtype of any other such
15531553
# type. For example, `Number -> integers() | floats()`, but bools() is
15541554
# not included because bool is a subclass of int as well as Number.
1555+
# Filter to matching subtypes *before* sorting, because computing the repr
1556+
# of every registered strategy (just to establish a deterministic order) is
1557+
# surprisingly expensive and usually wasted - the matching set is typically
1558+
# empty for user-defined types.
1559+
matching = [
1560+
(k, v)
1561+
for k, v in types._global_type_lookup.items()
1562+
if isinstance(k, type)
1563+
and issubclass(k, thing)
1564+
and sum(types.try_issubclass(k, typ) for typ in types._global_type_lookup) == 1
1565+
]
15551566
strategies = [
15561567
s
1557-
for s in (
1558-
as_strategy(v, thing)
1559-
for k, v in sorted(types._global_type_lookup.items(), key=repr)
1560-
if isinstance(k, type)
1561-
and issubclass(k, thing)
1562-
and sum(types.try_issubclass(k, typ) for typ in types._global_type_lookup)
1563-
== 1
1564-
)
1568+
for s in (as_strategy(v, thing) for _, v in sorted(matching, key=repr))
15651569
if s is not NotImplemented
15661570
]
15671571
if any(not s.is_empty for s in strategies):
@@ -1644,12 +1648,35 @@ def _get_typeddict_qualifiers(key, annotation_type):
16441648
"type without any subclasses. Consider using register_type_strategy"
16451649
)
16461650

1647-
subclass_strategies: SearchStrategy = nothing()
1648-
for sc in subclasses:
1649-
try:
1650-
subclass_strategies |= _from_type(sc)
1651-
except Exception:
1652-
pass
1651+
# When subclasses reference `thing` (directly, or via a sibling subclass)
1652+
# in their own annotations, naively resolving each subclass would re-resolve
1653+
# the entire hierarchy once per reference - which is combinatorially
1654+
# expensive for mutually-recursive types. So we guard against re-entry: if
1655+
# we're already resolving `thing` higher up the stack, return the cached
1656+
# strategy (so recursive references share one object, which lets recursion in
1657+
# e.g. is_empty checks terminate) rather than resolving it again.
1658+
#
1659+
# `from_type_guarded` only adds field annotations to the guard, and signals
1660+
# re-entry by raising RewindRecursive; we add `thing` here as well so that we
1661+
# also catch references reaching us by other routes, e.g. as a union arg.
1662+
try:
1663+
recurse_guard = _recurse_guard.get()
1664+
except LookupError:
1665+
_recurse_guard.set(recurse_guard := [])
1666+
if thing in recurse_guard:
1667+
return from_type(thing)
1668+
1669+
recurse_guard.append(thing)
1670+
try:
1671+
substrategies = []
1672+
for sc in subclasses:
1673+
try:
1674+
substrategies.append(_from_type(sc))
1675+
except Exception:
1676+
pass
1677+
finally:
1678+
recurse_guard.pop()
1679+
subclass_strategies = one_of(substrategies)
16531680
if subclass_strategies.is_empty:
16541681
# We're unable to resolve subclasses now, but we might be able to later -
16551682
# so we'll just go back to the mixed distribution.

hypothesis/tests/cover/test_lookup.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,53 @@ def test_cannot_resolve_abstract_class_with_no_concrete_subclass(instance):
919919
raise AssertionError("test body unreachable as strategy cannot resolve")
920920

921921

922+
def test_resolving_mutually_recursive_abstract_subclasses_is_efficient(monkeypatch):
923+
# Resolving an abstract type whose many subclasses refer back to it (directly
924+
# or via a sibling) used to re-resolve the whole hierarchy once per reference,
925+
# taking time cubic in the number of subclasses. We now resolve each type
926+
# only once, so the (linear) number of get_type_hints calls is our regression
927+
# metric - the old behaviour ran into the thousands even for N=15.
928+
import dataclasses
929+
930+
from hypothesis.strategies._internal import core
931+
932+
class Stmt(abc.ABC):
933+
@abc.abstractmethod
934+
def f(self) -> str: ...
935+
936+
# A leaf subclass gives the recursion a base case so generation can terminate.
937+
Leaf = dataclasses.make_dataclass(
938+
"Leaf", [("v", int)], bases=(Stmt,), namespace={"f": lambda self: ""}
939+
)
940+
n = 15
941+
# Keep a reference to the subclasses so they aren't garbage-collected (which
942+
# would remove them from Stmt.__subclasses__()) before we resolve the type.
943+
subclasses = [Leaf] + [
944+
dataclasses.make_dataclass(
945+
f"S{i}",
946+
[("a", Stmt), ("b", typing.Optional[Stmt])],
947+
bases=(Stmt,),
948+
namespace={"f": lambda self: ""},
949+
)
950+
for i in range(n)
951+
]
952+
assert set(Stmt.__subclasses__()) == set(subclasses)
953+
954+
calls = 0
955+
real_get_type_hints = core.get_type_hints
956+
957+
def counting_get_type_hints(thing):
958+
nonlocal calls
959+
calls += 1
960+
return real_get_type_hints(thing)
961+
962+
monkeypatch.setattr(core, "get_type_hints", counting_get_type_hints)
963+
st.from_type(Stmt).validate()
964+
assert calls < 50 * n
965+
966+
find_any(st.from_type(Stmt), lambda x: isinstance(x, Leaf))
967+
968+
922969
def test_type_with_unresolvable_forward_reference_fails():
923970
t = type["UnknownForwardRef"] # noqa: F821
924971
with pytest.raises(ResolutionFailed):

0 commit comments

Comments
 (0)