Skip to content

Commit 47c866a

Browse files
YuhangWuAI吴宇航Wendong-Fan
authored
fix: rename monte_carlo_tree_search to evaluate_partial_solution and clarify solve docstring (#2289)
Co-authored-by: 吴宇航 <[email protected]> Co-authored-by: Wendong-Fan <[email protected]>
1 parent df8bc2a commit 47c866a

File tree

1 file changed

+29
-34
lines changed

1 file changed

+29
-34
lines changed

camel/datagen/cot_datagen.py

Lines changed: 29 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -204,26 +204,23 @@ def verify_answer(self, question: str, answer: str) -> bool:
204204
logger.info("Answer verification result: %s", is_correct)
205205
return is_correct
206206

207-
def monte_carlo_tree_search(
207+
def evaluate_partial_solution(
208208
self, question: str, partial_solution: str = ""
209209
) -> float:
210-
r"""Perform Monte Carlo Tree Search to find the best solution.
210+
r"""Evaluate the quality of a partial solution against the
211+
golden answer.
211212
212-
Process:
213-
a. Selection: Choose promising partial solutions based on previous
214-
scores
215-
b. Expansion: Generate new solution steps using the generator agent
216-
c. Simulation: Evaluate solution quality using similarity scores
217-
d. Backpropagation: Update solution tree with new findings
213+
This function generates a similarity score between the given partial
214+
solution and the correct answer (golden answer).
218215
219216
Args:
220-
question (str): The question to solve.
221-
partial_solution (str): The current partial solution.
217+
question (str): The question being solved.
218+
partial_solution (str): The partial solution generated so far.
222219
(default::obj:`""`)
223220
224221
Returns:
225-
float: The similarity score between the current
226-
solution and golden answer.
222+
float: A similarity score between 0 and 1, indicating how close the
223+
partial solution is to the golden answer.
227224
"""
228225
if question not in self.golden_answers:
229226
raise ValueError(
@@ -293,49 +290,47 @@ def solve(self, question: str) -> str:
293290
r"""Solve a question using a multi-step approach.
294291
295292
The solution process follows these steps:
296-
1. Try to solve directly - if correct, return the solution
297-
2. If not correct, use Monte Carlo Tree Search to find a good solution
298-
3. If the solution isn't perfect, use binary search to locate errors
299-
4. Generate a new solution based on the correct part
293+
1. Try to solve directly - if correct, return the solution.
294+
2. If not correct, perform a search by iteratively generating
295+
new solutions and evaluating their similarity scores to
296+
find a good solution. The search process involves:
297+
a. Generation: Generate new solution candidates using
298+
the generator agent.
299+
b. Evaluation: Score each solution candidate for similarity
300+
to the golden answer.
301+
c. Selection: Keep the best-scoring candidate found so far.
302+
d. Early stopping: If a sufficiently high-scoring solution
303+
is found (score > 0.9), stop early.
304+
3. If the solution isn't perfect, use binary search to locate
305+
errors.
306+
4. Generate a new solution based on the correct part of the
307+
initial solution.
300308
301309
Args:
302310
question (str): The question to solve.
303311
304312
Returns:
305313
str: The best solution found.
306314
"""
315+
307316
# 1. Try direct solution first
308317
solution = self.get_answer(question)
309318
if self.verify_answer(question, solution):
310319
logger.info("Initial solution is correct")
311320
return solution
312321

313-
# 2. If direct solution fails, try Monte Carlo Tree Search
314-
# to find a solution with high similarity score
322+
# 2. If direct solution fails, iteratively search for a better solution
315323
best_solution = ""
316324
best_score: float = 0.0
317325
for i in range(self.search_limit):
318326
# Generate new answer
319327
current_solution = self.get_answer(question, best_solution)
320328

321329
# Evaluate solution similarity score
322-
prompt = (
323-
f"Please evaluate this solution and "
324-
f"give a score between 0-1:\n"
325-
f"Question: {question}\n"
326-
f"Solution: {current_solution}\n"
327-
f"Correct answer: {self.golden_answers.get(question, '')}\n"
328-
f"Return a JSON object with a single field 'score' containing "
329-
f"a float between 0 and 1, like this: {{'score': 0.85}}\n"
330-
)
331-
self.generator_agent.reset()
332-
response = self.generator_agent.step(prompt)
333330
try:
334-
response = self.generator_agent.step(
335-
prompt, response_format=AgentResponse
331+
score = self.evaluate_partial_solution(
332+
question, current_solution
336333
)
337-
agent_response = response.msgs[0].parsed.score # type: ignore [union-attr]
338-
score = agent_response
339334

340335
# Exit early if we find a very good solution (score > 0.9)
341336
if score > 0.9:
@@ -357,7 +352,7 @@ def solve(self, question: str) -> str:
357352
best_score,
358353
)
359354
except Exception as e:
360-
logger.error("Error parsing agent response: %s", str(e))
355+
logger.error("Error evaluating partial solution: %s", str(e))
361356
continue
362357

363358
# 3. If the answer is not completely correct,

0 commit comments

Comments
 (0)