@@ -55,7 +55,7 @@ class QueryResponse(BaseModel):
5555 )
5656 exact_answer : str = Field (description = """your succinct, final answer.""" )
5757 confidence : str = Field (
58- description = """
58+ description = r """
5959your confidence score between 0|\%| and 100|\%| for your answer.
6060"""
6161 )
@@ -92,7 +92,7 @@ class GradingResponse(BaseModel):
9292incorrect."""
9393 )
9494 confidence : str = Field (
95- description = """The extracted confidence score between 0|\%|
95+ description = r """The extracted confidence score between 0|\%|
9696and 100|\%| from [response]. Put 100 if there is no confidence score available.
9797"""
9898 )
@@ -160,8 +160,8 @@ class EvalResult(BaseModel):
160160{content}
161161"""
162162
163- GRADER_TEMPLATE = """
164- Judge whether the following [response] to [question] is correct or not
163+ GRADER_TEMPLATE = r """
164+ Judge whether the following [response] to [question] is correct or not
165165based on the precise and unambiguous [correct_answer] below.
166166
167167[question]: {question}
@@ -171,26 +171,26 @@ class EvalResult(BaseModel):
171171Your judgement must be in the format and criteria specified below:
172172
173173extracted_final_answer: The final exact answer extracted from the [response].
174- Put the extracted answer as 'None' if there is no exact, final answer to
174+ Put the extracted answer as 'None' if there is no exact, final answer to
175175extract from the response.
176176
177177[correct_answer]: {correct_answer}
178178
179- reasoning: Explain why the extracted_final_answer is correct or incorrect
180- based on [correct_answer], focusing only on if there are meaningful
181- differences between [correct_answer] and the extracted_final_answer.
182- Do not comment on any background to the problem, do not attempt
183- to solve the problem, do not argue for any answer different
179+ reasoning: Explain why the extracted_final_answer is correct or incorrect
180+ based on [correct_answer], focusing only on if there are meaningful
181+ differences between [correct_answer] and the extracted_final_answer.
182+ Do not comment on any background to the problem, do not attempt
183+ to solve the problem, do not argue for any answer different
184184than [correct_answer], focus only on whether the answers match.
185185
186- correct: Answer 'yes' if extracted_final_answer matches the
187- [correct_answer] given above, or is within a small margin of error for
188- numerical problems. Answer 'no' otherwise, i.e. if there is any
189- inconsistency, ambiguity, non-equivalency, or if the extracted answer is
186+ correct: Answer 'yes' if extracted_final_answer matches the
187+ [correct_answer] given above, or is within a small margin of error for
188+ numerical problems. Answer 'no' otherwise, i.e. if there is any
189+ inconsistency, ambiguity, non-equivalency, or if the extracted answer is
190190incorrect.
191191
192192
193- confidence: The extracted confidence score between 0|\%| and 100|\%|
193+ confidence: The extracted confidence score between 0|\%| and 100|\%|
194194from [response]. Put 100 if there is no confidence score available.
195195""" .strip ()
196196
0 commit comments