Skip to content

Commit c2da058

Browse files
committed
fix: omit multi-turn invalid reasons
1 parent 5d6a880 commit c2da058

2 files changed

Lines changed: 0 additions & 22 deletions

File tree

src/inference_endpoint/commands/benchmark/execute.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -965,14 +965,6 @@ def finalize_benchmark(ctx: BenchmarkContext, bench: BenchmarkResult) -> None:
965965
}
966966
if accuracy_scores:
967967
results["accuracy_scores"] = accuracy_scores
968-
invalid_reasons = [
969-
f"{name}: {score['invalid_reason']}"
970-
for name, score in accuracy_scores.items()
971-
if score.get("valid") is False
972-
]
973-
if invalid_reasons:
974-
results["valid"] = False
975-
results["invalid_reasons"] = invalid_reasons
976968
if ctx.collect_responses:
977969
results["responses"] = collector.responses
978970
if collector.errors:

src/inference_endpoint/evaluation/scoring.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -603,20 +603,6 @@ def score(self) -> tuple[float | None, int]:
603603
}
604604
if excluded_turns:
605605
result["excluded_turns"] = excluded_turns
606-
if not valid:
607-
reasons: list[str] = []
608-
if not expected:
609-
reasons.append("no expected assistant turns found")
610-
if not observed_repeats:
611-
reasons.append("no matching completed turns found")
612-
if missing_outputs:
613-
reasons.append(
614-
f"{missing_outputs}/{len(expected_outputs)} expected turn(s) "
615-
"missing output"
616-
)
617-
if not n_scored:
618-
reasons.append("no scorable turns found")
619-
result["invalid_reason"] = "; ".join(reasons)
620606

621607
out_path = self.report_dir / self.scores_filename
622608
out_path.parent.mkdir(parents=True, exist_ok=True)

0 commit comments

Comments
 (0)