add model_id tracking

HenriqueAssumpcao · HenriqueAssumpcao · commit 39fd7b6bb128 · 2026-02-09T12:52:43.000Z
diff --git a/openevolve/database.py b/openevolve/database.py
@@ -55,6 +55,7 @@ class Program:
     generation: int = 0
     timestamp: float = field(default_factory=time.time)
     iteration_found: int = 0  # Track which iteration this program was found
+    model_id: Optional[int] = None # Track the id of the model that generated this program
 
     # Performance metrics
     metrics: Dict[str, float] = field(default_factory=dict)
@@ -1016,10 +1017,10 @@ def _llm_judge_novelty(self, program: Program, similar_program: Program) -> bool
                             messages=[{"role": "user", "content": user_msg}],
                         ),
                     )
-                    content: str = future.result()
+                    content, _model_id = future.result()
             except RuntimeError:
                 # No event loop running, safe to use asyncio.run()
-                content: str = asyncio.run(
+                content, _model_id = asyncio.run(
                     self.novelty_llm.generate_with_context(
                         system_message=NOVELTY_SYSTEM_MSG,
                         messages=[{"role": "user", "content": user_msg}],
diff --git a/openevolve/iteration.py b/openevolve/iteration.py
@@ -89,7 +89,7 @@ async def run_iteration_with_shared_db(
         iteration_start = time.time()
 
         # Generate code modification
-        llm_response = await llm_ensemble.generate_with_context(
+        llm_response, model_id = await llm_ensemble.generate_with_context(
             system_message=prompt["system"],
             messages=[{"role": "user", "content": prompt["user"]}],
         )
@@ -181,6 +181,7 @@ async def run_iteration_with_shared_db(
             generation=parent.generation + 1,
             metrics=result.child_metrics,
             iteration_found=iteration,
+            model_id=model_id,
             metadata={
                 "changes": changes_summary,
                 "parent_metrics": parent.metrics,
diff --git a/openevolve/llm/ensemble.py b/openevolve/llm/ensemble.py
@@ -55,39 +55,71 @@ def __init__(self, models_cfg: List[LLMModelConfig]):
             )
             logger._ensemble_logged = True
 
-    async def generate(self, prompt: str, **kwargs) -> str:
-        """Generate text using a randomly selected model based on weights"""
-        model = self._sample_model()
-        return await model.generate(prompt, **kwargs)
+    async def generate(self, prompt: str, **kwargs) -> Tuple[str, int]:
+        """Generate text using a randomly selected model based on weights
+
+        Returns:
+            Tuple of (generated_text, model_id) where model_id is the index
+            of the selected model in the ensemble
+        """
+        model, model_id = self._sample_model()
+        response = await model.generate(prompt, **kwargs)
+        return response, model_id
 
     async def generate_with_context(
         self, system_message: str, messages: List[Dict[str, str]], **kwargs
-    ) -> str:
-        """Generate text using a system message and conversational context"""
-        model = self._sample_model()
-        return await model.generate_with_context(system_message, messages, **kwargs)
-
-    def _sample_model(self) -> LLMInterface:
-        """Sample a model from the ensemble based on weights"""
+    ) -> Tuple[str, int]:
+        """Generate text using a system message and conversational context
+
+        Returns:
+            Tuple of (generated_text, model_id) where model_id is the index
+            of the selected model in the ensemble
+        """
+        model, model_id = self._sample_model()
+        response = await model.generate_with_context(system_message, messages, **kwargs)
+        return response, model_id
+
+    def _sample_model(self) -> Tuple[LLMInterface, int]:
+        """Sample a model from the ensemble based on weights
+
+        Returns:
+            Tuple of (model, model_id) where model_id is the index of the
+            selected model in the ensemble
+        """
         index = self.random_state.choices(range(len(self.models)), weights=self.weights, k=1)[0]
         sampled_model = self.models[index]
         logger.info(f"Sampled model: {vars(sampled_model)['model']}")
-        return sampled_model
+        return sampled_model, index
+
+    async def generate_multiple(self, prompt: str, n: int, **kwargs) -> List[Tuple[str, int]]:
+        """Generate multiple texts in parallel
 
-    async def generate_multiple(self, prompt: str, n: int, **kwargs) -> List[str]:
-        """Generate multiple texts in parallel"""
+        Returns:
+            List of (generated_text, model_id) tuples where model_id is the
+            index of the selected model in the ensemble
+        """
         tasks = [self.generate(prompt, **kwargs) for _ in range(n)]
         return await asyncio.gather(*tasks)
 
-    async def parallel_generate(self, prompts: List[str], **kwargs) -> List[str]:
-        """Generate responses for multiple prompts in parallel"""
+    async def parallel_generate(self, prompts: List[str], **kwargs) -> List[Tuple[str, int]]:
+        """Generate responses for multiple prompts in parallel
+
+        Returns:
+            List of (generated_text, model_id) tuples where model_id is the
+            index of the selected model in the ensemble
+        """
         tasks = [self.generate(prompt, **kwargs) for prompt in prompts]
         return await asyncio.gather(*tasks)
 
     async def generate_all_with_context(
         self, system_message: str, messages: List[Dict[str, str]], **kwargs
-    ) -> str:
-        """Generate text using a all available models and average their returned metrics"""
+    ) -> List[str]:
+        """Generate text using all available models and average their returned metrics
+
+        Returns:
+            List of generated texts, one per model in the ensemble (order matches
+            self.models). The model_id for each response is its index in the list.
+        """
         responses = []
         for model in self.models:
             responses.append(await model.generate_with_context(system_message, messages, **kwargs))
diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py
@@ -197,7 +197,7 @@ def _run_iteration_worker(
 
         # Generate code modification (sync wrapper for async)
         try:
-            llm_response = asyncio.run(
+            llm_response, model_id = asyncio.run(
                 _worker_llm_ensemble.generate_with_context(
                     system_message=prompt["system"],
                     messages=[{"role": "user", "content": prompt["user"]}],
@@ -304,6 +304,7 @@ def _run_iteration_worker(
             generation=parent.generation + 1,
             metrics=child_metrics,
             iteration_found=iteration,
+            model_id=model_id,
             metadata={
                 "changes": changes_summary,
                 "parent_metrics": parent.metrics,
diff --git a/tests/test_llm_ensemble.py b/tests/test_llm_ensemble.py
@@ -17,19 +17,22 @@ def test_weighted_sampling(self):
         ensemble = LLMEnsemble(models)
         # Should always sample model 'b'
         for _ in range(10):
-            self.assertEqual(ensemble._sample_model().model, "b")
+            model, model_id = ensemble._sample_model()
+            self.assertEqual(model.model, "b")
+            self.assertEqual(model_id, 1)
 
         models = [
             LLMModelConfig(name="a", weight=0.3, api_key="test", api_base="http://test"),
             LLMModelConfig(name="b", weight=0.3, api_key="test", api_base="http://test"),
             LLMModelConfig(name="c", weight=0.3, api_key="test", api_base="http://test"),
         ]
         ensemble = LLMEnsemble(models)
-        # Should sample both models. Track sampled models in a set
+        # Should sample all models. Track sampled models in a set
         sampled_models = set()
         for _ in range(1000):
-            sampled_models.add(ensemble._sample_model().model)
-            # Cancel once we have both models
+            model, model_id = ensemble._sample_model()
+            sampled_models.add(model.model)
+            # Cancel once we have all models
             if len(sampled_models) == len(models):
                 break
         self.assertEqual(len(sampled_models), len(models))
diff --git a/tests/test_novelty_asyncio_issue.py b/tests/test_novelty_asyncio_issue.py
@@ -15,11 +15,11 @@
 
 
 class MockLLM:
-    """Mock LLM that implements the async interface"""
+    """Mock LLM that implements the LLMEnsemble async interface"""
 
     async def generate_with_context(self, system_message: str, messages: list):
-        """Mock async generate method that returns NOVEL"""
-        return "NOVEL"
+        """Mock async generate method that returns NOVEL with model_id"""
+        return "NOVEL", 0
 
 
 class TestNoveltyAsyncioIssue(unittest.TestCase):