Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion vlmeval/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
from .qbench_video import QBench_Video, QBench_Video_MCQ, QBench_Video_VQA
from .reasonmap_plus import ReasonMap_Plus
from .refcoco import RefCOCODataset
from .pixmopoints import PixmoPointsDataset
from .refspatial import RefSpatialDataset
from .refspatialbench import RefSpatialBench
from .robospatialbench import RoboSpatialBench
Expand Down Expand Up @@ -292,7 +293,7 @@ def evaluate(self, eval_file, **judge_kwargs):
FoxBench, VTCBench, Asclepius, PlotQA, ChartX, ChartBench, ChartCapDataset, WorldVQA, PuzzleVQA, VisualPuzzles, # noqa: E501
MMSafetyBenchDataset, MSSBenchDataset, SIUODataset, SIUOGenDataset, SIUOMCQDataset, M3oralBenchDataset, # noqa: E501
Design2Code, VLADBench, SSIBenchDataset, NPMM, SGI_Bench_Experimental_Reasoning, MMOral_OPG_OPEN, MMOral_OPG_CLOSED, # noqa: E501
SciDocBench,
SciDocBench, PixmoPointsDataset
]

# add by EASI team
Expand Down
121 changes: 121 additions & 0 deletions vlmeval/dataset/pixmopoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import json

import numpy as np
import pandas as pd

from vlmeval.smp import dump, get_intermediate_file_path, load
from .image_base import ImageBaseDataset
from .utils.spatial_bench.tools.utils import Point2DParser


class PixmoPointsDataset(ImageBaseDataset):
"""Point localization evaluation using Hungarian matching."""

TYPE = 'VQA'
DATASET_URL = {'PixmoPoints': ''}
DATASET_MD5 = {}

DISTANCE_THRESHOLD = 0.05 # 5% of normalized image size

PROMPT_SUFFIX = (
' Output the point coordinates in JSON format.\n'
'For example:\n'
'[\n'
' {"point_2d": [x, y], "label": "point_1"}\n'
']'
)

def build_prompt(self, line):
msgs = super().build_prompt(line)
assert msgs[-1]['type'] == 'text'
msgs[-1]['value'] += self.PROMPT_SUFFIX
return msgs

def evaluate(self, eval_file, **judge_kwargs):
data = load(eval_file)
if not isinstance(data, pd.DataFrame):
data = pd.DataFrame(data)
data = data.sort_values(by='index')

meta = self.data.copy()
meta['index'] = meta['index'].astype(str)
meta = meta.set_index('index')
data['index'] = data['index'].astype(str)

from scipy.optimize import linear_sum_assignment

details = []
precision_sum, recall_sum, f1_sum, total = 0, 0, 0, 0
for _, row in data.iterrows():
meta_row = meta.loc[row['index']] if row['index'] in meta.index else row
width = int(float(meta_row.get('width', row.get('width', 1)) or 1))
height = int(float(meta_row.get('height', row.get('height', 1)) or 1))

pred_pts = Point2DParser.parse(str(row['prediction']), width, height, output='norm')
gt_pts = self._parse_points(str(meta_row.get('answer', row.get('answer', ''))))
pred_pts = pred_pts.tolist() if pred_pts is not None else []

if len(gt_pts) == 0:
precision, recall, f1 = (1.0, 1.0, 1.0) if len(pred_pts) == 0 else (0.0, 1.0, 0.0)
elif len(pred_pts) == 0:
precision, recall, f1 = 0.0, 0.0, 0.0
else:
pred_arr = np.array(pred_pts)
gt_arr = np.array(gt_pts)
dists = np.linalg.norm(pred_arr[:, None] - gt_arr[None, :], axis=2)
row_ind, col_ind = linear_sum_assignment(dists)

matches = sum(
dists[i, j] < self.DISTANCE_THRESHOLD
for i, j in zip(row_ind, col_ind)
)
precision = matches / len(pred_pts)
recall = matches / len(gt_pts)
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

precision_sum += precision
recall_sum += recall
f1_sum += f1
total += 1
details.append({
'index': row['index'],
'precision': precision,
'recall': recall,
'f1': f1,
})

result = {
'precision': precision_sum / total if total > 0 else 0,
'recall': recall_sum / total if total > 0 else 0,
'f1': f1_sum / total if total > 0 else 0,
}
dump(pd.DataFrame(details), get_intermediate_file_path(eval_file, '_detail'))
dump(result, get_intermediate_file_path(eval_file, '_score', 'json'))
return result

@staticmethod
def _parse_points(s):
try:
pts = json.loads(s)
if not isinstance(pts, list):
return []
result = []
for p in pts:
point = None
if isinstance(p, list) and len(p) == 2:
point = p
elif isinstance(p, dict) and 'point_2d' in p and isinstance(p['point_2d'], list) and len(p['point_2d']) == 2:
point = p['point_2d']
elif isinstance(p, dict) and 'point' in p and isinstance(p['point'], list) and len(p['point']) == 2:
point = p['point']
if point is None:
continue
try:
x, y = float(point[0]), float(point[1])
except (TypeError, ValueError):
continue
if 0.0 <= x <= 1.0 and 0.0 <= y <= 1.0:
result.append([x, y])
return result
except (json.JSONDecodeError, TypeError, KeyError, ValueError):
return []