Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions src/llama_stack_provider_trustyai_garak/evalhub/garak_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,11 @@ def _normalize_url(self, url: str) -> str:

# return env

@staticmethod
def _pct_to_ratio(value: float) -> float:
"""Convert a 0-100 percentage to a 0-1 ratio."""
return round(value / 100, 4)

def _parse_results(
self,
result: GarakScanResult,
Expand Down Expand Up @@ -1310,8 +1315,8 @@ def _parse_results(
metrics.append(
EvaluationResult(
metric_name="attack_success_rate",
metric_value=overall_asr,
metric_type="percentage",
metric_value=self._pct_to_ratio(overall_asr),
metric_type="ratio",
num_samples=overall_summary.get("total_attempts"),
)
)
Expand Down Expand Up @@ -1350,8 +1355,8 @@ def _parse_results(
metrics.append(
EvaluationResult(
metric_name=f"{probe_name}_asr",
metric_value=attack_success_rate,
metric_type="percentage",
metric_value=self._pct_to_ratio(attack_success_rate),
metric_type="ratio",
num_samples=probe_attempts if not art_intents else None,
metadata=probe_metadata,
)
Expand All @@ -1360,7 +1365,7 @@ def _parse_results(
overall_score = overall_summary.get("attack_success_rate")
if overall_score is not None:
try:
overall_score = float(overall_score)
overall_score = self._pct_to_ratio(float(overall_score))
except (TypeError, ValueError):
overall_score = None

Expand All @@ -1370,7 +1375,7 @@ def _parse_results(
for probe_name, score_data in combined["scores"].items()
if probe_name != "_overall"
)
overall_score = round((total_vulnerable / total_attempts) * 100, 2)
overall_score = self._pct_to_ratio((total_vulnerable / total_attempts) * 100)

num_examples = overall_summary.get("total_attempts", total_attempts)
try:
Expand Down Expand Up @@ -1531,7 +1536,7 @@ def main(adapter_cls: type[GarakAdapter] = GarakAdapter) -> None:

results = adapter.run_benchmark_job(adapter.job_spec, callbacks)
logger.info(f"Job completed successfully: {results.id}")
logger.info(f"Overall attack success rate: {results.overall_score}%")
logger.info(f"Overall attack success rate (ratio): {results.overall_score}")

callbacks.report_results(results)
sys.exit(0)
Expand Down
14 changes: 9 additions & 5 deletions tests/test_evalhub_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,12 @@ def test_parse_results_uses_overall_without_double_count(monkeypatch, tmp_path):

assert len(metrics) == 2
assert metrics[0].metric_name == "attack_success_rate"
assert metrics[0].metric_value == 30.0
assert metrics[0].metric_type == "ratio"
assert metrics[0].metric_value == 0.3
assert metrics[0].num_samples == 10
assert metrics[1].metric_name == "probe.alpha_asr"
assert overall_score == 30.0
assert metrics[1].metric_type == "ratio"
assert overall_score == 0.3
assert num_examples == 10
assert overall_summary["tbsa"] == 4.1

Expand Down Expand Up @@ -1969,17 +1971,19 @@ def test_parse_results_with_art_intents(self, monkeypatch, tmp_path):

assert len(metrics) == 2
assert metrics[0].metric_name == "attack_success_rate"
assert metrics[0].metric_value == 30.0
assert metrics[0].metric_type == "ratio"
assert metrics[0].metric_value == 0.3
assert metrics[0].num_samples == 20
assert metrics[1].metric_name == "spo.SPOIntent_asr"
assert metrics[1].metric_value == 30.0
assert metrics[1].metric_type == "ratio"
assert metrics[1].metric_value == 0.3
assert metrics[1].num_samples is None
assert metrics[1].metadata["total_attempts"] == 20
assert metrics[1].metadata["unsafe_stubs"] == 3
assert metrics[1].metadata["safe_stubs"] == 7
assert "intent_breakdown" in metrics[1].metadata
assert metrics[1].metadata["intent_breakdown"]["S001"]["unsafe_stubs"] == 2
assert overall_score == 30.0
assert overall_score == 0.3
assert num_examples == 20


Expand Down
Loading