@@ -144,8 +144,14 @@ def __evaluate__(self, answer_text, target_text, is_valid):
144
144
else :
145
145
return "incorrect"
146
146
147
- class ExactOrMatch (ExactMatch ):
148
- """This class checks for a case-sensitive, but otherwise exact match, and returns the or of them."""
147
+ class MultiCandidateAnyExactMatch (ExactMatch ):
148
+ """
149
+ This class checks for a case-sensitive match for a list of answers from the model output,
150
+ and returns the or of the list of metric results.
151
+
152
+ This is required for answers to multiple-choice questions. As many models sometimes give the letter answer
153
+ and sometimes the full word answer. This allows one to consider the answer correct if either one was correct.
154
+ """
149
155
150
156
def __evaluate__ (self , answer_texts , target_text , is_valid ):
151
157
@@ -170,8 +176,14 @@ class CaseInsensitiveMatch(ExactMatch):
170
176
def __evaluate__ (self , answer_text , target_text , is_valid ):
171
177
return super ().__evaluate__ (str (answer_text ).lower (), str (target_text ).lower (), is_valid )
172
178
173
- class CaseInsensitiveOrMatch (ExactOrMatch ):
174
- """This class checks for a case-insensitive, but otherwise exact or match."""
179
+ class MultiCandidateAnyCaseInsensitiveMatch (MultiCandidateAnyExactMatch ):
180
+ """
181
+ This class checks for a case-insensitive match for a list of answers from the model output,
182
+ and returns the or of the list of metric results.
183
+
184
+ This is required for answers to multiple-choice questions. As many models sometimes give the letter answer
185
+ and sometimes the full word answer. This allows one to consider the answer correct if either one was correct.
186
+ """
175
187
176
188
def __evaluate__ (self , answer_texts , target_text , is_valid ):
177
189
answer_texts = [str (answer_text ).lower () for answer_text in answer_texts ]
0 commit comments