Skip to content

Commit c032f33

Browse files
committed
feat: remove EnvironmentStateEvaluator from codebase
Remove EnvironmentStateEvaluator and its associated prompt template. This includes removing it from the public API exports, the prompt templates module, and the default evaluators registry in Experiment.
1 parent a375539 commit c032f33

File tree

6 files changed

+0
-512
lines changed

6 files changed

+0
-512
lines changed

src/strands_evals/evaluators/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from .coherence_evaluator import CoherenceEvaluator
22
from .conciseness_evaluator import ConcisenessEvaluator
33
from .deterministic import Contains, Equals, StartsWith, StateEquals, ToolCalled
4-
from .environment_state_evaluator import EnvironmentStateEvaluator
54
from .evaluator import Evaluator
65
from .faithfulness_evaluator import FaithfulnessEvaluator
76
from .goal_success_rate_evaluator import GoalSuccessRateEvaluator
@@ -28,7 +27,6 @@
2827
"ToolParameterAccuracyEvaluator",
2928
"ConcisenessEvaluator",
3029
"CoherenceEvaluator",
31-
"EnvironmentStateEvaluator",
3230
"Contains",
3331
"Equals",
3432
"StartsWith",

src/strands_evals/evaluators/environment_state_evaluator.py

Lines changed: 0 additions & 74 deletions
This file was deleted.

src/strands_evals/evaluators/prompt_templates/prompt_templates.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -124,30 +124,6 @@
124124
"""
125125

126126

127-
judge_environment_state_template = """You are an expert evaluator that assesses the environment state produced by a task according to a user-specified rubric. You'll receive some combination of:
128-
- <Input>: Optional original input that initiated the task
129-
- <Output>: Optional output response from the task
130-
- <ActualEnvironmentState>: The actual state of the environment after task execution
131-
- <ExpectedEnvironmentState>: Optional reference for what the environment state should be
132-
- <Rubric>: Evaluation criteria
133-
134-
Evaluate the actual environment state against the expected state and rubric. Focus on whether the task produced the correct side effects in the environment (e.g., files created, database records modified, tests passing, system state changes). Ignore minor formatting differences and focus on semantic correctness of the state.
135-
Keep the reason as concise as possible.
136-
137-
Examples:
138-
<Input>Fix the failing test in test_auth.py</Input>
139-
<ActualEnvironmentState>[{"name": "test_results", "state": {"exit_code": 0, "passed": 5, "failed": 0}}]</ActualEnvironmentState>
140-
<ExpectedEnvironmentState>[{"name": "test_results", "state": {"exit_code": 0}}]</ExpectedEnvironmentState>
141-
<Rubric>Pass if all tests pass after the fix. Score 0-1 based on test success.</Rubric>
142-
{"reason": "All 5 tests pass with exit code 0, indicating the fix was successful.", "test_pass": true, "score": 1.0}
143-
144-
<Input>Create a user record in the database</Input>
145-
<ActualEnvironmentState>[{"name": "database", "state": {"users_table": [{"id": 1, "name": "John", "email": "john@example.com"}]}}]</ActualEnvironmentState>
146-
<ExpectedEnvironmentState>[{"name": "database", "state": {"users_table": [{"id": 1, "name": "Jane", "email": "jane@example.com"}]}}]</ExpectedEnvironmentState>
147-
<Rubric>Pass if the correct user record was created. Score 0-1 based on record accuracy.</Rubric>
148-
{"reason": "A user record was created but with incorrect data: name is 'John' instead of 'Jane' and email is 'john@example.com' instead of 'jane@example.com'.", "test_pass": false, "score": 0.2}
149-
"""
150-
151127
judge_interactions_template = """You are an expert evaluator that assesses multi-agent interactions according to a user-specified rubric. You'll receive:
152128
- <Input>: Optional original input that initiated the interaction sequence
153129
- <Interaction>: Current interaction with node name, dependencies, and message

src/strands_evals/experiment.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
from .case import Case
2020
from .evaluators.deterministic import Contains, Equals, StartsWith, StateEquals, ToolCalled
21-
from .evaluators.environment_state_evaluator import EnvironmentStateEvaluator
2221
from .evaluators.evaluator import Evaluator
2322
from .evaluators.interactions_evaluator import InteractionsEvaluator
2423
from .evaluators.output_evaluator import OutputEvaluator
@@ -805,7 +804,6 @@ def from_dict(cls, data: dict, custom_evaluators: list[type[Evaluator]] | None =
805804
"StartsWith": StartsWith,
806805
"StateEquals": StateEquals,
807806
"ToolCalled": ToolCalled,
808-
"EnvironmentStateEvaluator": EnvironmentStateEvaluator,
809807
}
810808
all_evaluators: dict[str, type[Evaluator]] = {
811809
**default_evaluators,

tests/strands_evals/evaluators/test_environment_state_evaluator.py

Lines changed: 0 additions & 157 deletions
This file was deleted.

0 commit comments

Comments
 (0)