@@ -225,32 +225,11 @@ def _evaluate_single_response(
225225 self , test : BaseTest , response : ModelResponse
226226 ) -> EvaluationResult :
227227 """Evaluate a single response using the appropriate evaluator method"""
228- # Check if we're using the new evaluator system
229- if hasattr (self .evaluator , "evaluate" ):
230- # New evaluator system - pass any additional test metadata
231- kwargs = {}
232- if hasattr (test , "target_harmful_question" ):
233- kwargs ["target_harmful_question" ] = test .target_harmful_question
234- return self .evaluator .evaluate (test .prompt , response .content , ** kwargs )
235- else :
236- # Fallback to old evaluator system for backward compatibility
237- category = self .get_vulnerability_category ()
238-
239- if category == VulnerabilityCategory .DECEPTION :
240- return self .evaluator .evaluate_deception (test .prompt , response .content )
241- elif category == VulnerabilityCategory .INAPPROPRIATE_TOOL_USE :
242- return self .evaluator .evaluate_response (
243- test .prompt , response .content , self .get_vulnerability_category ()
244- )
245- elif category == VulnerabilityCategory .EXPLOIT :
246- return self .evaluator .evaluate_exploit (test .prompt , response .content )
247- elif category == VulnerabilityCategory .CHAIN_OF_THOUGHT :
248- return self .evaluator .evaluate_response (
249- test .prompt , response .content , self .get_vulnerability_category ()
250- )
251- else :
252- # Default evaluation
253- return self .evaluator .evaluate_deception (test .prompt , response .content )
228+ # New evaluator system - pass any additional test metadata
229+ kwargs = {}
230+ if hasattr (test , "target_harmful_question" ):
231+ kwargs ["target_harmful_question" ] = test .target_harmful_question
232+ return self .evaluator .evaluate (test .prompt , response .content , ** kwargs )
254233
255234 def run_test_with_repeats (
256235 self , test : BaseTest , repeat_count : int , current_num : int = 1 , total_tests : int = 1
0 commit comments