1- from ast import keyword
1+ from dataclasses import dataclass
2+ from typing import List , Dict , Any
23import asyncio
34import atexit
4- from dataclasses import dataclass
5- from typing import List , Optional , Dict
6- from collections import defaultdict
7- from deepeval import evaluate
8- from deepeval .test_case import LLMTestCase
9- from deepeval .metrics import BaseMetric
5+
106from deepeval .openai .extractors import InputParameters
11- from deepeval .test_run import auto_log_hyperparameters
7+ from deepeval .test_case import LLMTestCase
128from deepeval .evaluate import AsyncConfig
9+ from deepeval .metrics import BaseMetric
10+ from deepeval import evaluate
1311
1412@dataclass
1513class TestCaseMetricPair :
1614 test_case : LLMTestCase
1715 metrics : List [BaseMetric ]
16+ hyperparameters : Dict [str , Any ]
1817
1918@dataclass
2019class TestCasesMetricSet :
2120 test_cases : List [LLMTestCase ]
2221 metrics : List [BaseMetric ]
22+ hyperparameters : Dict [str , Any ]
2323
2424test_case_pairs : List [TestCaseMetricPair ] = []
2525
2626
27- def add_test_case (test_case : LLMTestCase , metrics : List [BaseMetric ]):
28- test_case_pairs .append (TestCaseMetricPair (test_case = test_case , metrics = metrics ))
27+ def add_test_case (
28+ test_case : LLMTestCase ,
29+ metrics : List [BaseMetric ],
30+ input_parameters : InputParameters ,
31+ ):
32+ test_case_pairs .append (
33+ TestCaseMetricPair (
34+ test_case = test_case ,
35+ metrics = metrics ,
36+ hyperparameters = create_hyperparameters_map (input_parameters )
37+ )
38+ )
2939
3040##############################################
3141# Evaluation
@@ -41,13 +51,17 @@ async def evaluate_async():
4151 if key not in grouped :
4252 grouped [key ] = TestCasesMetricSet (
4353 test_cases = [pair .test_case ],
44- metrics = pair .metrics
54+ metrics = pair .metrics ,
55+ hyperparameters = pair .hyperparameters
4556 )
4657 else :
4758 grouped [key ].test_cases .append (pair .test_case )
4859 for key , cases in grouped .items ():
49- evaluate (test_cases = cases .test_cases , metrics = cases .metrics )
50-
60+ evaluate (
61+ test_cases = cases .test_cases ,
62+ metrics = cases .metrics ,
63+ hyperparameters = cases .hyperparameters
64+ )
5165
5266def evaluate_sync ():
5367 sync_config = AsyncConfig (run_async = False )
@@ -60,12 +74,18 @@ def evaluate_sync():
6074 if key not in grouped :
6175 grouped [key ] = TestCasesMetricSet (
6276 test_cases = [pair .test_case ],
63- metrics = pair .metrics
77+ metrics = pair .metrics ,
78+ hyperparameters = pair .hyperparameters
6479 )
6580 else :
6681 grouped [key ].test_cases .append (pair .test_case )
6782 for key , cases in grouped .items ():
68- evaluate (test_cases = cases .test_cases , metrics = cases .metrics , async_config = sync_config )
83+ evaluate (
84+ test_cases = cases .test_cases ,
85+ metrics = cases .metrics ,
86+ hyperparameters = cases .hyperparameters ,
87+ async_config = sync_config
88+ )
6989
7090@atexit .register
7191def run_evaluations_atexit ():
@@ -80,11 +100,12 @@ def run_evaluations_atexit():
80100 except Exception as e :
81101 print ("⚠️ Could not schedule async evaluation in atexit: " , e )
82102
103+
83104##############################################
84105# Hyperparameters
85106##############################################
86107
87- def log_hyperparameters (input_parameters : InputParameters ):
108+ def create_hyperparameters_map (input_parameters : InputParameters ):
88109 hyperparameters = {"model" : input_parameters .model }
89110 if input_parameters .instructions :
90111 hyperparameters ["system_prompt" ] = input_parameters .instructions
@@ -94,4 +115,4 @@ def log_hyperparameters(input_parameters: InputParameters):
94115 hyperparameters ["system_prompt" ] = (
95116 system_messages [0 ] if len (system_messages ) == 1 else str (system_messages )
96117 )
97- auto_log_hyperparameters ( hyperparameters )
118+ return hyperparameters
0 commit comments