@@ -204,26 +204,23 @@ def verify_answer(self, question: str, answer: str) -> bool:
204204 logger .info ("Answer verification result: %s" , is_correct )
205205 return is_correct
206206
207- def monte_carlo_tree_search (
207+ def evaluate_partial_solution (
208208 self , question : str , partial_solution : str = ""
209209 ) -> float :
210- r"""Perform Monte Carlo Tree Search to find the best solution.
210+ r"""Evaluate the quality of a partial solution against the
211+ golden answer.
211212
212- Process:
213- a. Selection: Choose promising partial solutions based on previous
214- scores
215- b. Expansion: Generate new solution steps using the generator agent
216- c. Simulation: Evaluate solution quality using similarity scores
217- d. Backpropagation: Update solution tree with new findings
213+ This function generates a similarity score between the given partial
214+ solution and the correct answer (golden answer).
218215
219216 Args:
220- question (str): The question to solve .
221- partial_solution (str): The current partial solution.
217+ question (str): The question being solved .
218+ partial_solution (str): The partial solution generated so far .
222219 (default::obj:`""`)
223220
224221 Returns:
225- float: The similarity score between the current
226- solution and golden answer.
222+ float: A similarity score between 0 and 1, indicating how close the
223+ partial solution is to the golden answer.
227224 """
228225 if question not in self .golden_answers :
229226 raise ValueError (
@@ -293,49 +290,47 @@ def solve(self, question: str) -> str:
293290 r"""Solve a question using a multi-step approach.
294291
295292 The solution process follows these steps:
296- 1. Try to solve directly - if correct, return the solution
297- 2. If not correct, use Monte Carlo Tree Search to find a good solution
298- 3. If the solution isn't perfect, use binary search to locate errors
299- 4. Generate a new solution based on the correct part
293+ 1. Try to solve directly - if correct, return the solution.
294+ 2. If not correct, perform a search by iteratively generating
295+ new solutions and evaluating their similarity scores to
296+ find a good solution. The search process involves:
297+ a. Generation: Generate new solution candidates using
298+ the generator agent.
299+ b. Evaluation: Score each solution candidate for similarity
300+ to the golden answer.
301+ c. Selection: Keep the best-scoring candidate found so far.
302+ d. Early stopping: If a sufficiently high-scoring solution
303+ is found (score > 0.9), stop early.
304+ 3. If the solution isn't perfect, use binary search to locate
305+ errors.
306+ 4. Generate a new solution based on the correct part of the
307+ initial solution.
300308
301309 Args:
302310 question (str): The question to solve.
303311
304312 Returns:
305313 str: The best solution found.
306314 """
315+
307316 # 1. Try direct solution first
308317 solution = self .get_answer (question )
309318 if self .verify_answer (question , solution ):
310319 logger .info ("Initial solution is correct" )
311320 return solution
312321
313- # 2. If direct solution fails, try Monte Carlo Tree Search
314- # to find a solution with high similarity score
322+ # 2. If direct solution fails, iteratively search for a better solution
315323 best_solution = ""
316324 best_score : float = 0.0
317325 for i in range (self .search_limit ):
318326 # Generate new answer
319327 current_solution = self .get_answer (question , best_solution )
320328
321329 # Evaluate solution similarity score
322- prompt = (
323- f"Please evaluate this solution and "
324- f"give a score between 0-1:\n "
325- f"Question: { question } \n "
326- f"Solution: { current_solution } \n "
327- f"Correct answer: { self .golden_answers .get (question , '' )} \n "
328- f"Return a JSON object with a single field 'score' containing "
329- f"a float between 0 and 1, like this: {{'score': 0.85}}\n "
330- )
331- self .generator_agent .reset ()
332- response = self .generator_agent .step (prompt )
333330 try :
334- response = self .generator_agent . step (
335- prompt , response_format = AgentResponse
331+ score = self .evaluate_partial_solution (
332+ question , current_solution
336333 )
337- agent_response = response .msgs [0 ].parsed .score # type: ignore [union-attr]
338- score = agent_response
339334
340335 # Exit early if we find a very good solution (score > 0.9)
341336 if score > 0.9 :
@@ -357,7 +352,7 @@ def solve(self, question: str) -> str:
357352 best_score ,
358353 )
359354 except Exception as e :
360- logger .error ("Error parsing agent response : %s" , str (e ))
355+ logger .error ("Error evaluating partial solution : %s" , str (e ))
361356 continue
362357
363358 # 3. If the answer is not completely correct,
0 commit comments