Skip to content

Commit 1e1eca8

Browse files
committed
Speed up stateful testing with many rules
Selecting which rule to run next repeated work that scales with the number of rules on every step - most notably recomputing the uncached label of the rule-selection strategy, which was rebuilt from scratch each step. Reuse a single filtered strategy across steps, and cache the per-class setup (sorting the rules and building the underlying strategy) so it runs once per class rather than each time the machine is instantiated. https://claude.ai/code/session_01AnXgxUP11ftXiFNH8wXATk
1 parent 3af3f1f commit 1e1eca8

2 files changed

Lines changed: 39 additions & 17 deletions

File tree

hypothesis/RELEASE.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
RELEASE_TYPE: patch
2+
3+
This patch improves the performance of :ref:`stateful testing <stateful>` for
4+
state machines with a large number of rules (:issue:`4465`).

hypothesis/src/hypothesis/stateful.py

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
)
5151
from hypothesis.internal.validation import check_type
5252
from hypothesis.reporting import current_verbosity, report
53-
from hypothesis.strategies._internal.featureflags import FeatureStrategy
53+
from hypothesis.strategies._internal.featureflags import FeatureFlags, FeatureStrategy
5454
from hypothesis.strategies._internal.strategies import (
5555
Ex,
5656
OneOfStrategy,
@@ -1099,32 +1099,60 @@ class RuleStrategy(SearchStrategy):
10991099
def __init__(self, machine: RuleBasedStateMachine) -> None:
11001100
super().__init__()
11011101
self.machine = machine
1102-
self.rules = machine.rules.copy()
1102+
self.rules, rule_names, self.rules_strategy = self._setup_for(type(machine))
11031103

11041104
self.enabled_rules_strategy = st.shared(
1105-
FeatureStrategy(at_least_one_of={r.function.__name__ for r in self.rules}),
1105+
FeatureStrategy(at_least_one_of=rule_names),
11061106
key=("enabled rules", machine),
11071107
)
11081108

1109+
# Reuse a single filtered strategy across steps instead of rebuilding it
1110+
# each time. Rebuilding forced a recompute of the (uncached) sampled_from
1111+
# label, which is O(number of rules) per step. The filter predicate reads
1112+
# the feature flags set by do_draw on each step.
1113+
self._feature_flags: FeatureFlags | None = None
1114+
self._enabled_rules_strategy = self.rules_strategy.filter(self._rule_is_enabled)
1115+
1116+
@classmethod
1117+
@lru_cache
1118+
def _setup_for(
1119+
cls, machine_type: type[RuleBasedStateMachine]
1120+
) -> tuple[list["Rule"], frozenset[str], SearchStrategy]:
1121+
# Cache (per machine class) the work of sorting the rules and building
1122+
# the sampled_from strategy, which is O(number of rules) and would
1123+
# otherwise be repeated every time the machine is instantiated; see
1124+
# https://github.com/HypothesisWorks/hypothesis/issues/4465.
1125+
rules = machine_type.setup_state().rules.copy()
11091126
# The order is a bit arbitrary. Primarily we're trying to group rules
11101127
# that write to the same location together, and to put rules with no
11111128
# target first as they have less effect on the structure. We order from
11121129
# fewer to more arguments on grounds that it will plausibly need less
11131130
# data. This probably won't work especially well and we could be
11141131
# smarter about it, but it's better than just doing it in definition
11151132
# order.
1116-
self.rules.sort(
1133+
rules.sort(
11171134
key=lambda rule: (
11181135
sorted(rule.targets),
11191136
len(rule.arguments),
11201137
rule.function.__name__,
11211138
)
11221139
)
1123-
self.rules_strategy = st.sampled_from(self.rules)
1140+
rule_names = frozenset(r.function.__name__ for r in rules)
1141+
return (rules, rule_names, st.sampled_from(rules))
11241142

11251143
def __repr__(self):
11261144
return f"{self.__class__.__name__}(machine={self.machine.__class__.__name__}({{...}}))"
11271145

1146+
def _rule_is_enabled(self, r):
1147+
# Note: The order of the filters here is actually quite important,
1148+
# because checking is_enabled makes choices, so increases the size of
1149+
# the choice sequence. This means that if we are in a case where many
1150+
# rules are invalid we would make a lot more choices if we ask if they
1151+
# are enabled before we ask if they are valid, so our test cases would
1152+
# be artificially large.
1153+
assert self._feature_flags is not None
1154+
return self.is_valid(r) and self._feature_flags.is_enabled(r.function.__name__)
1155+
11281156
def do_draw(self, data):
11291157
if not any(self.is_valid(rule) for rule in self.rules):
11301158
rules = ", ".join([rule.function.__name__ for rule in self.rules])
@@ -1134,18 +1162,8 @@ def do_draw(self, data):
11341162
)
11351163
raise InvalidDefinition(msg) from None
11361164

1137-
feature_flags = data.draw(self.enabled_rules_strategy)
1138-
1139-
def rule_is_enabled(r):
1140-
# Note: The order of the filters here is actually quite important,
1141-
# because checking is_enabled makes choices, so increases the size of
1142-
# the choice sequence. This means that if we are in a case where many
1143-
# rules are invalid we would make a lot more choices if we ask if they
1144-
# are enabled before we ask if they are valid, so our test cases would
1145-
# be artificially large.
1146-
return self.is_valid(r) and feature_flags.is_enabled(r.function.__name__)
1147-
1148-
rule = data.draw(self.rules_strategy.filter(rule_is_enabled))
1165+
self._feature_flags = data.draw(self.enabled_rules_strategy)
1166+
rule = data.draw(self._enabled_rules_strategy)
11491167

11501168
arguments = {}
11511169
for k, strat in rule.arguments_strategies.items():

0 commit comments

Comments
 (0)