@@ -189,6 +189,88 @@ def create_oci_artifact(self, _spec):
189189 assert captured ["timeout_seconds" ] == 42
190190
191191
192+ def test_simple_mode_passes_hf_cache_env (monkeypatch , tmp_path ):
193+ """When hf_cache_path is set, _run_simple passes HF_HUB_CACHE via env to run_garak_scan."""
194+ module = _load_evalhub_garak_adapter (monkeypatch )
195+ adapter = module .GarakAdapter ()
196+ monkeypatch .setenv ("GARAK_SCAN_DIR" , str (tmp_path ))
197+
198+ captured : dict [str , object ] = {}
199+
200+ def _fake_run_garak_scan (config_file , timeout_seconds , report_prefix , env = None , log_file = None ):
201+ captured ["env" ] = env
202+ report_prefix .with_suffix (".report.jsonl" ).write_text ("{}" , encoding = "utf-8" )
203+ return module .GarakScanResult (returncode = 0 , stdout = "" , stderr = "" , report_prefix = report_prefix )
204+
205+ monkeypatch .setattr (module , "run_garak_scan" , _fake_run_garak_scan )
206+ monkeypatch .setattr (module , "convert_to_avid_report" , lambda _path : True )
207+ monkeypatch .setattr (
208+ module .GarakAdapter ,
209+ "_parse_results" ,
210+ lambda self , result , eval_threshold , art_intents = False : ([], None , 0 , {"total_attempts" : 0 }),
211+ )
212+
213+ class _Callbacks :
214+ def report_status (self , _update ):
215+ return None
216+
217+ def create_oci_artifact (self , _spec ):
218+ return SimpleNamespace (reference = "oci://ref" , digest = "sha256:test" )
219+
220+ job = SimpleNamespace (
221+ id = "hf-cache-job" ,
222+ benchmark_id = "trustyai_garak::quick" ,
223+ benchmark_index = 0 ,
224+ model = SimpleNamespace (url = "http://localhost:8000" , name = "test-model" ),
225+ parameters = {"hf_cache_path" : "/test_data/hf-cache" },
226+ exports = None ,
227+ )
228+
229+ adapter .run_benchmark_job (job , _Callbacks ())
230+ assert captured ["env" ] == {"HF_HUB_CACHE" : "/test_data/hf-cache" }
231+
232+
233+ def test_simple_mode_no_hf_cache_passes_none_env (monkeypatch , tmp_path ):
234+ """When hf_cache_path is not set, env=None is passed (default behavior)."""
235+ module = _load_evalhub_garak_adapter (monkeypatch )
236+ adapter = module .GarakAdapter ()
237+ monkeypatch .setenv ("GARAK_SCAN_DIR" , str (tmp_path ))
238+
239+ captured : dict [str , object ] = {}
240+
241+ def _fake_run_garak_scan (config_file , timeout_seconds , report_prefix , env = None , log_file = None ):
242+ captured ["env" ] = env
243+ report_prefix .with_suffix (".report.jsonl" ).write_text ("{}" , encoding = "utf-8" )
244+ return module .GarakScanResult (returncode = 0 , stdout = "" , stderr = "" , report_prefix = report_prefix )
245+
246+ monkeypatch .setattr (module , "run_garak_scan" , _fake_run_garak_scan )
247+ monkeypatch .setattr (module , "convert_to_avid_report" , lambda _path : True )
248+ monkeypatch .setattr (
249+ module .GarakAdapter ,
250+ "_parse_results" ,
251+ lambda self , result , eval_threshold , art_intents = False : ([], None , 0 , {"total_attempts" : 0 }),
252+ )
253+
254+ class _Callbacks :
255+ def report_status (self , _update ):
256+ return None
257+
258+ def create_oci_artifact (self , _spec ):
259+ return SimpleNamespace (reference = "oci://ref" , digest = "sha256:test" )
260+
261+ job = SimpleNamespace (
262+ id = "no-hf-cache-job" ,
263+ benchmark_id = "trustyai_garak::quick" ,
264+ benchmark_index = 0 ,
265+ model = SimpleNamespace (url = "http://localhost:8000" , name = "test-model" ),
266+ parameters = {},
267+ exports = None ,
268+ )
269+
270+ adapter .run_benchmark_job (job , _Callbacks ())
271+ assert captured ["env" ] is None
272+
273+
192274def test_parse_results_uses_overall_without_double_count (monkeypatch , tmp_path ):
193275 module = _load_evalhub_garak_adapter (monkeypatch )
194276 adapter = module .GarakAdapter ()
@@ -245,8 +327,11 @@ def test_parse_results_uses_overall_without_double_count(monkeypatch, tmp_path):
245327 )
246328 metrics , overall_score , num_examples , overall_summary = adapter ._parse_results (result , 0.5 )
247329
248- assert len (metrics ) == 1
249- assert metrics [0 ].metric_name == "probe.alpha_asr"
330+ assert len (metrics ) == 2
331+ assert metrics [0 ].metric_name == "attack_success_rate"
332+ assert metrics [0 ].metric_value == 30.0
333+ assert metrics [0 ].num_samples == 10
334+ assert metrics [1 ].metric_name == "probe.alpha_asr"
250335 assert overall_score == 30.0
251336 assert num_examples == 10
252337 assert overall_summary ["tbsa" ] == 4.1
@@ -1882,15 +1967,18 @@ def test_parse_results_with_art_intents(self, monkeypatch, tmp_path):
18821967 art_intents = True ,
18831968 )
18841969
1885- assert len (metrics ) == 1
1886- assert metrics [0 ].metric_name == "spo.SPOIntent_asr "
1970+ assert len (metrics ) == 2
1971+ assert metrics [0 ].metric_name == "attack_success_rate "
18871972 assert metrics [0 ].metric_value == 30.0
1888- assert metrics [0 ].num_samples is None
1889- assert metrics [0 ].metadata ["total_attempts" ] == 20
1890- assert metrics [0 ].metadata ["unsafe_stubs" ] == 3
1891- assert metrics [0 ].metadata ["safe_stubs" ] == 7
1892- assert "intent_breakdown" in metrics [0 ].metadata
1893- assert metrics [0 ].metadata ["intent_breakdown" ]["S001" ]["unsafe_stubs" ] == 2
1973+ assert metrics [0 ].num_samples == 20
1974+ assert metrics [1 ].metric_name == "spo.SPOIntent_asr"
1975+ assert metrics [1 ].metric_value == 30.0
1976+ assert metrics [1 ].num_samples is None
1977+ assert metrics [1 ].metadata ["total_attempts" ] == 20
1978+ assert metrics [1 ].metadata ["unsafe_stubs" ] == 3
1979+ assert metrics [1 ].metadata ["safe_stubs" ] == 7
1980+ assert "intent_breakdown" in metrics [1 ].metadata
1981+ assert metrics [1 ].metadata ["intent_breakdown" ]["S001" ]["unsafe_stubs" ] == 2
18941982 assert overall_score == 30.0
18951983 assert num_examples == 20
18961984
0 commit comments