Skip to content

Commit 203d046

Browse files
codelionclaude
andcommitted
Add integration tests for example validation
Add comprehensive integration tests that verify: - Example config files load correctly - Initial programs have EVOLVE-BLOCK markers - Evaluators exist and have required functions - Evaluators can run on initial programs - Cascade evaluation functions are detected - Database stores and retrieves programs correctly - Program evolution tracking works Tests cover function_minimization, circle_packing, and signal_processing examples, plus general structure validation for all examples. Total tests: 346 (was 326) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent ad02110 commit 203d046

File tree

1 file changed

+375
-0
lines changed

1 file changed

+375
-0
lines changed
Lines changed: 375 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,375 @@
1+
"""
2+
Integration tests that validate existing examples work correctly.
3+
These tests verify that evaluators, configs, and initial programs are properly set up.
4+
"""
5+
6+
import importlib.util
7+
import os
8+
import sys
9+
import tempfile
10+
import shutil
11+
import unittest
12+
from pathlib import Path
13+
from unittest.mock import patch
14+
15+
# Add project root to path
16+
PROJECT_ROOT = Path(__file__).parent.parent.parent
17+
sys.path.insert(0, str(PROJECT_ROOT))
18+
19+
from openevolve.config import Config, load_config
20+
from openevolve.evaluator import Evaluator
21+
22+
23+
class TestFunctionMinimizationExample(unittest.TestCase):
24+
"""Integration tests for the function_minimization example"""
25+
26+
EXAMPLE_DIR = PROJECT_ROOT / "examples" / "function_minimization"
27+
28+
def test_config_loads(self):
29+
"""Test that the config file loads without errors"""
30+
config_path = self.EXAMPLE_DIR / "config.yaml"
31+
if not config_path.exists():
32+
self.skipTest("function_minimization config not found")
33+
34+
config = load_config(str(config_path))
35+
self.assertIsInstance(config, Config)
36+
self.assertGreater(config.max_iterations, 0)
37+
38+
def test_initial_program_exists(self):
39+
"""Test that the initial program file exists"""
40+
program_path = self.EXAMPLE_DIR / "initial_program.py"
41+
self.assertTrue(program_path.exists(), "initial_program.py should exist")
42+
43+
def test_initial_program_has_evolve_block(self):
44+
"""Test that the initial program has EVOLVE-BLOCK markers"""
45+
program_path = self.EXAMPLE_DIR / "initial_program.py"
46+
if not program_path.exists():
47+
self.skipTest("initial_program.py not found")
48+
49+
content = program_path.read_text()
50+
self.assertIn("EVOLVE-BLOCK-START", content)
51+
self.assertIn("EVOLVE-BLOCK-END", content)
52+
53+
def test_evaluator_exists(self):
54+
"""Test that the evaluator file exists"""
55+
evaluator_path = self.EXAMPLE_DIR / "evaluator.py"
56+
self.assertTrue(evaluator_path.exists(), "evaluator.py should exist")
57+
58+
def test_evaluator_has_evaluate_function(self):
59+
"""Test that the evaluator has an evaluate function"""
60+
evaluator_path = self.EXAMPLE_DIR / "evaluator.py"
61+
if not evaluator_path.exists():
62+
self.skipTest("evaluator.py not found")
63+
64+
spec = importlib.util.spec_from_file_location("evaluator", evaluator_path)
65+
evaluator_module = importlib.util.module_from_spec(spec)
66+
spec.loader.exec_module(evaluator_module)
67+
68+
self.assertTrue(hasattr(evaluator_module, "evaluate"))
69+
self.assertTrue(callable(evaluator_module.evaluate))
70+
71+
def test_evaluator_runs_on_initial_program(self):
72+
"""Test that the evaluator can evaluate the initial program"""
73+
evaluator_path = self.EXAMPLE_DIR / "evaluator.py"
74+
program_path = self.EXAMPLE_DIR / "initial_program.py"
75+
76+
if not evaluator_path.exists() or not program_path.exists():
77+
self.skipTest("Example files not found")
78+
79+
# Load evaluator
80+
spec = importlib.util.spec_from_file_location("evaluator", evaluator_path)
81+
evaluator_module = importlib.util.module_from_spec(spec)
82+
spec.loader.exec_module(evaluator_module)
83+
84+
# Run evaluation
85+
result = evaluator_module.evaluate(str(program_path))
86+
87+
# Check result structure
88+
if hasattr(result, 'metrics'):
89+
# EvaluationResult object
90+
metrics = result.metrics
91+
else:
92+
# Dictionary
93+
metrics = result
94+
95+
self.assertIn("combined_score", metrics)
96+
self.assertIsInstance(metrics["combined_score"], (int, float))
97+
98+
99+
class TestCirclePackingExample(unittest.TestCase):
100+
"""Integration tests for the circle_packing example"""
101+
102+
EXAMPLE_DIR = PROJECT_ROOT / "examples" / "circle_packing"
103+
104+
def test_config_loads(self):
105+
"""Test that config files load without errors"""
106+
for config_name in ["config_phase_1.yaml", "config_phase_2.yaml"]:
107+
config_path = self.EXAMPLE_DIR / config_name
108+
if config_path.exists():
109+
config = load_config(str(config_path))
110+
self.assertIsInstance(config, Config)
111+
112+
def test_evaluator_exists(self):
113+
"""Test that evaluator exists"""
114+
evaluator_path = self.EXAMPLE_DIR / "evaluator.py"
115+
self.assertTrue(evaluator_path.exists(), "evaluator.py should exist")
116+
117+
def test_evaluator_has_evaluate_function(self):
118+
"""Test that the evaluator has required functions"""
119+
evaluator_path = self.EXAMPLE_DIR / "evaluator.py"
120+
if not evaluator_path.exists():
121+
self.skipTest("evaluator.py not found")
122+
123+
spec = importlib.util.spec_from_file_location("evaluator", evaluator_path)
124+
evaluator_module = importlib.util.module_from_spec(spec)
125+
spec.loader.exec_module(evaluator_module)
126+
127+
self.assertTrue(hasattr(evaluator_module, "evaluate"))
128+
129+
130+
class TestSignalProcessingExample(unittest.TestCase):
131+
"""Integration tests for the signal_processing example"""
132+
133+
EXAMPLE_DIR = PROJECT_ROOT / "examples" / "signal_processing"
134+
135+
def test_config_loads(self):
136+
"""Test that the config file loads"""
137+
config_path = self.EXAMPLE_DIR / "config.yaml"
138+
if not config_path.exists():
139+
self.skipTest("signal_processing config not found")
140+
141+
config = load_config(str(config_path))
142+
self.assertIsInstance(config, Config)
143+
144+
def test_evaluator_exists(self):
145+
"""Test that evaluator exists"""
146+
evaluator_path = self.EXAMPLE_DIR / "evaluator.py"
147+
if not evaluator_path.exists():
148+
self.skipTest("evaluator.py not found")
149+
self.assertTrue(evaluator_path.exists())
150+
151+
152+
class TestEvaluatorIntegration(unittest.TestCase):
153+
"""Integration tests for the Evaluator class with real examples"""
154+
155+
def test_evaluator_loads_function_minimization(self):
156+
"""Test that Evaluator can load the function_minimization evaluator"""
157+
evaluator_path = PROJECT_ROOT / "examples" / "function_minimization" / "evaluator.py"
158+
if not evaluator_path.exists():
159+
self.skipTest("function_minimization evaluator not found")
160+
161+
from openevolve.config import EvaluatorConfig
162+
config = EvaluatorConfig(timeout=30, cascade_evaluation=True)
163+
164+
evaluator = Evaluator(config, str(evaluator_path))
165+
self.assertIsNotNone(evaluator.evaluate_function)
166+
self.assertTrue(callable(evaluator.evaluate_function))
167+
168+
def test_evaluator_module_has_cascade_functions(self):
169+
"""Test that function_minimization evaluator has cascade functions"""
170+
evaluator_path = PROJECT_ROOT / "examples" / "function_minimization" / "evaluator.py"
171+
if not evaluator_path.exists():
172+
self.skipTest("function_minimization evaluator not found")
173+
174+
# Load the module directly to check for cascade functions
175+
spec = importlib.util.spec_from_file_location("evaluator", evaluator_path)
176+
module = importlib.util.module_from_spec(spec)
177+
spec.loader.exec_module(module)
178+
179+
# function_minimization has evaluate_stage1 and evaluate_stage2
180+
self.assertTrue(hasattr(module, "evaluate_stage1"))
181+
self.assertTrue(hasattr(module, "evaluate_stage2"))
182+
self.assertTrue(callable(module.evaluate_stage1))
183+
self.assertTrue(callable(module.evaluate_stage2))
184+
185+
186+
class TestConfigIntegration(unittest.TestCase):
187+
"""Integration tests for config loading across examples"""
188+
189+
@patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"})
190+
def test_all_example_configs_load(self):
191+
"""Test that all example config files can be loaded"""
192+
examples_dir = PROJECT_ROOT / "examples"
193+
failed_configs = []
194+
195+
for config_path in examples_dir.rglob("*config*.yaml"):
196+
try:
197+
config = load_config(str(config_path))
198+
self.assertIsInstance(config, Config)
199+
except Exception as e:
200+
failed_configs.append((str(config_path), str(e)))
201+
202+
if failed_configs:
203+
failure_msg = "\n".join([f"{path}: {error}" for path, error in failed_configs])
204+
self.fail(f"Failed to load configs:\n{failure_msg}")
205+
206+
def test_config_has_required_sections(self):
207+
"""Test that loaded configs have required sections"""
208+
config_path = PROJECT_ROOT / "examples" / "function_minimization" / "config.yaml"
209+
if not config_path.exists():
210+
self.skipTest("function_minimization config not found")
211+
212+
config = load_config(str(config_path))
213+
214+
# Check required sections
215+
self.assertIsNotNone(config.llm)
216+
self.assertIsNotNone(config.database)
217+
self.assertIsNotNone(config.evaluator)
218+
self.assertIsNotNone(config.prompt)
219+
220+
221+
class TestEndToEndWithMockedLLM(unittest.TestCase):
222+
"""End-to-end tests with mocked LLM responses"""
223+
224+
def setUp(self):
225+
"""Set up test fixtures"""
226+
self.temp_dir = tempfile.mkdtemp()
227+
self.example_dir = PROJECT_ROOT / "examples" / "function_minimization"
228+
229+
def tearDown(self):
230+
"""Clean up"""
231+
shutil.rmtree(self.temp_dir, ignore_errors=True)
232+
233+
def test_database_stores_and_retrieves_programs(self):
234+
"""Test that the database can store and retrieve programs"""
235+
from openevolve.database import ProgramDatabase, Program, DatabaseConfig
236+
237+
config = DatabaseConfig(population_size=100)
238+
db = ProgramDatabase(config)
239+
240+
# Add a program
241+
program = Program(
242+
id="test_prog_1",
243+
code="def test(): return 42",
244+
generation=0,
245+
metrics={"combined_score": 0.5},
246+
)
247+
db.add(program)
248+
249+
# Retrieve it
250+
retrieved = db.programs.get("test_prog_1")
251+
self.assertIsNotNone(retrieved)
252+
self.assertEqual(retrieved.code, "def test(): return 42")
253+
254+
def test_program_evolution_tracking(self):
255+
"""Test that program generations are tracked correctly"""
256+
from openevolve.database import ProgramDatabase, Program, DatabaseConfig
257+
258+
config = DatabaseConfig(population_size=100)
259+
db = ProgramDatabase(config)
260+
261+
# Add parent program
262+
parent = Program(
263+
id="parent_1",
264+
code="def test(): return 1",
265+
generation=0,
266+
metrics={"combined_score": 0.3},
267+
)
268+
db.add(parent)
269+
270+
# Add child program
271+
child = Program(
272+
id="child_1",
273+
code="def test(): return 2",
274+
generation=1,
275+
parent_id="parent_1",
276+
metrics={"combined_score": 0.5},
277+
)
278+
db.add(child)
279+
280+
# Verify relationships
281+
self.assertEqual(db.programs["child_1"].parent_id, "parent_1")
282+
self.assertEqual(db.programs["child_1"].generation, 1)
283+
284+
def test_evaluator_returns_evaluation_result(self):
285+
"""Test that evaluators return proper EvaluationResult objects"""
286+
from openevolve.evaluation_result import EvaluationResult
287+
288+
evaluator_path = self.example_dir / "evaluator.py"
289+
program_path = self.example_dir / "initial_program.py"
290+
291+
if not evaluator_path.exists() or not program_path.exists():
292+
self.skipTest("Example files not found")
293+
294+
spec = importlib.util.spec_from_file_location("evaluator", evaluator_path)
295+
evaluator_module = importlib.util.module_from_spec(spec)
296+
spec.loader.exec_module(evaluator_module)
297+
298+
result = evaluator_module.evaluate(str(program_path))
299+
300+
# Should be an EvaluationResult or dict with metrics
301+
if isinstance(result, EvaluationResult):
302+
self.assertIn("combined_score", result.metrics)
303+
else:
304+
self.assertIn("combined_score", result)
305+
306+
307+
class TestExampleStructure(unittest.TestCase):
308+
"""Tests to verify example directory structure is correct"""
309+
310+
def test_examples_have_required_files(self):
311+
"""Test that examples have the minimum required files"""
312+
examples_dir = PROJECT_ROOT / "examples"
313+
314+
# These examples should have at least a config and evaluator
315+
required_examples = [
316+
"function_minimization",
317+
"circle_packing",
318+
]
319+
320+
for example_name in required_examples:
321+
example_dir = examples_dir / example_name
322+
if not example_dir.exists():
323+
continue
324+
325+
# Check for config
326+
config_files = list(example_dir.glob("*config*.yaml"))
327+
self.assertGreater(
328+
len(config_files), 0,
329+
f"{example_name} should have at least one config file"
330+
)
331+
332+
# Check for evaluator
333+
evaluator_path = example_dir / "evaluator.py"
334+
self.assertTrue(
335+
evaluator_path.exists(),
336+
f"{example_name} should have evaluator.py"
337+
)
338+
339+
def test_evaluators_are_importable(self):
340+
"""Test that all evaluators can be imported without errors"""
341+
examples_dir = PROJECT_ROOT / "examples"
342+
failed_imports = []
343+
344+
for evaluator_path in examples_dir.rglob("evaluator.py"):
345+
try:
346+
spec = importlib.util.spec_from_file_location(
347+
f"evaluator_{evaluator_path.parent.name}",
348+
evaluator_path
349+
)
350+
module = importlib.util.module_from_spec(spec)
351+
spec.loader.exec_module(module)
352+
353+
# Verify evaluate function exists
354+
if not hasattr(module, "evaluate"):
355+
failed_imports.append(
356+
(str(evaluator_path), "Missing evaluate function")
357+
)
358+
except Exception as e:
359+
failed_imports.append((str(evaluator_path), str(e)))
360+
361+
if failed_imports:
362+
# Only fail if critical examples fail
363+
critical_failures = [
364+
f for f in failed_imports
365+
if "function_minimization" in f[0] or "circle_packing" in f[0]
366+
]
367+
if critical_failures:
368+
failure_msg = "\n".join(
369+
[f"{path}: {error}" for path, error in critical_failures]
370+
)
371+
self.fail(f"Critical evaluators failed to import:\n{failure_msg}")
372+
373+
374+
if __name__ == "__main__":
375+
unittest.main()

0 commit comments

Comments
 (0)