|
32 | 32 |
|
33 | 33 | from lighteval.logging.evaluation_tracker import EvaluationTracker |
34 | 34 | from lighteval.logging.info_loggers import DetailsLogger |
| 35 | +from lighteval.models.endpoints.litellm_model import LiteLLMModelConfig |
| 36 | +from lighteval.models.endpoints.tgi_model import TGIModelConfig |
| 37 | +from lighteval.pipeline import Pipeline |
35 | 38 |
|
36 | 39 | # ruff: noqa |
37 | 40 | from tests.fixtures import TESTING_EMPTY_HF_ORG_ID |
@@ -128,6 +131,77 @@ def test_results_logging_template(self, mock_evaluation_tracker: EvaluationTrack |
128 | 131 | assert saved_results["results"] == task_metrics |
129 | 132 | assert saved_results["config_general"]["model_name"] == "test_model" |
130 | 133 |
|
| 134 | + def test_results_redacts_litellm_api_key(self, mock_evaluation_tracker: EvaluationTracker): |
| 135 | + mock_evaluation_tracker.general_config_logger.log_model_info( |
| 136 | + LiteLLMModelConfig(model_name="test_model", api_key="super-secret-key") |
| 137 | + ) |
| 138 | + |
| 139 | + results = mock_evaluation_tracker.results |
| 140 | + |
| 141 | + assert results["config_general"]["model_config"]["api_key"] == "REDACTED" |
| 142 | + |
| 143 | + mock_evaluation_tracker.save() |
| 144 | + |
| 145 | + results_dir = Path(mock_evaluation_tracker.output_dir) / "results" / "test_model" |
| 146 | + result_files = list(results_dir.glob("results_*.json")) |
| 147 | + assert len(result_files) == 1 |
| 148 | + |
| 149 | + with open(result_files[0], "r") as f: |
| 150 | + saved_results = json.load(f) |
| 151 | + |
| 152 | + assert saved_results["config_general"]["model_config"]["api_key"] == "REDACTED" |
| 153 | + assert saved_results["config_general"]["model_config"]["model_name"] == "test_model" |
| 154 | + |
| 155 | + def test_results_redacts_tgi_auth(self, mock_evaluation_tracker: EvaluationTracker): |
| 156 | + mock_evaluation_tracker.general_config_logger.log_model_info( |
| 157 | + TGIModelConfig( |
| 158 | + model_name="test_model", |
| 159 | + inference_server_address="http://localhost:8080", |
| 160 | + inference_server_auth="super-secret-token", |
| 161 | + ) |
| 162 | + ) |
| 163 | + |
| 164 | + results = mock_evaluation_tracker.results |
| 165 | + |
| 166 | + assert results["config_general"]["model_config"]["inference_server_auth"] == "REDACTED" |
| 167 | + assert results["config_general"]["model_config"]["model_name"] == "test_model" |
| 168 | + |
| 169 | + def test_pipeline_get_results_redacts_litellm_api_key(self, mock_evaluation_tracker: EvaluationTracker): |
| 170 | + mock_evaluation_tracker.general_config_logger.log_model_info( |
| 171 | + LiteLLMModelConfig(model_name="test_model", api_key="super-secret-key") |
| 172 | + ) |
| 173 | + |
| 174 | + pipeline = Pipeline.__new__(Pipeline) |
| 175 | + pipeline.accelerator = None |
| 176 | + pipeline.parallel_context = None |
| 177 | + pipeline.final_dict = None |
| 178 | + pipeline.evaluation_tracker = mock_evaluation_tracker |
| 179 | + |
| 180 | + results = pipeline.get_results() |
| 181 | + |
| 182 | + assert results["config_general"]["model_config"]["api_key"] == "REDACTED" |
| 183 | + assert results["config_general"]["model_config"]["model_name"] == "test_model" |
| 184 | + |
| 185 | + def test_pipeline_get_results_redacts_tgi_auth(self, mock_evaluation_tracker: EvaluationTracker): |
| 186 | + mock_evaluation_tracker.general_config_logger.log_model_info( |
| 187 | + TGIModelConfig( |
| 188 | + model_name="test_model", |
| 189 | + inference_server_address="http://localhost:8080", |
| 190 | + inference_server_auth="super-secret-token", |
| 191 | + ) |
| 192 | + ) |
| 193 | + |
| 194 | + pipeline = Pipeline.__new__(Pipeline) |
| 195 | + pipeline.accelerator = None |
| 196 | + pipeline.parallel_context = None |
| 197 | + pipeline.final_dict = None |
| 198 | + pipeline.evaluation_tracker = mock_evaluation_tracker |
| 199 | + |
| 200 | + results = pipeline.get_results() |
| 201 | + |
| 202 | + assert results["config_general"]["model_config"]["inference_server_auth"] == "REDACTED" |
| 203 | + assert results["config_general"]["model_config"]["model_name"] == "test_model" |
| 204 | + |
131 | 205 | @pytest.mark.evaluation_tracker(save_details=True) |
132 | 206 | def test_details_logging(self, mock_evaluation_tracker, mock_datetime): |
133 | 207 | task_details = { |
|
0 commit comments