-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark_regen_labels.py
More file actions
84 lines (74 loc) · 2.69 KB
/
Copy pathbenchmark_regen_labels.py
File metadata and controls
84 lines (74 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""Gold labels sidecar for ``benchmark-track-regen.py`` (#372).
Схема JSON (``schema_version``: ``1``):
```json
{
"schema_version": 1,
"gold_by_basename": {
"clip.mp4": ["Eurasian Blue Tit", "Great Tit"]
}
}
```
Ключи — **basename** файла (как ``os.path.basename`` для ``--video``).
Значения — списки ожидаемых имён видов (как у fused после классификатора).
"""
from __future__ import annotations
import json
import os
from typing import Any, Mapping
def load_gold_by_basename(path: str) -> dict[str, list[str]]:
"""Загрузить и проверить sidecar; вернуть карту basename → список видов."""
with open(path, encoding='utf-8') as fh:
data = json.load(fh)
ver = data.get('schema_version', 1)
if ver != 1:
raise ValueError(
f'Unsupported labels schema_version: {ver!r} (expected 1)',
)
raw = data.get('gold_by_basename')
if raw is None:
raise ValueError('labels JSON: missing gold_by_basename')
if not isinstance(raw, Mapping):
raise ValueError('labels JSON: gold_by_basename must be an object')
out: dict[str, list[str]] = {}
for k, v in raw.items():
key = str(k).strip()
if isinstance(v, list):
out[key] = [str(x).strip() for x in v if str(x).strip()]
elif isinstance(v, str):
s = v.strip()
out[key] = [s] if s else []
else:
raise ValueError(f'labels JSON: invalid gold list for {key!r}')
return out
def eval_video_against_gold(
gold_map: Mapping[str, list[str]],
video_path: str,
fused_tracks: list[dict[str, Any]],
) -> dict[str, Any] | None:
"""
Сравнить уникальные виды из fused-треков с gold для basename ролика.
Возвращает ``None``, если для этого basename нет записи в ``gold_map``.
"""
base = os.path.basename(video_path)
gold = gold_map.get(base)
if gold is None:
return None
pred_names: list[str] = []
for t in fused_tracks:
sn = t.get('species_name')
if sn:
pred_names.append(str(sn).strip())
pred_unique = sorted(set(pred_names))
gold_set = set(gold)
pred_set = set(pred_unique)
matched = gold_set & pred_set
return {
'video_basename': base,
'gold_species': gold,
'predicted_species_unique': pred_unique,
'missing_vs_gold': sorted(gold_set - pred_set),
'extra_vs_gold': sorted(pred_set - gold_set),
'gold_species_recall': (
len(matched) / len(gold_set) if gold_set else None
),
}