Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ and correlate to human judgments.

**TODO** list evaluation metrics here.

### Contributing

Please make sure to run `black pose_evaluation` before submitting a pull request.

## Cite

If you use our toolkit in your research or projects, please consider citing the work.
Expand Down
43 changes: 33 additions & 10 deletions pose_evaluation/evaluation/evaluate_signclip.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import argparse
from pathlib import Path
import time
import json
import random
import pandas as pd
import time
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from tqdm import tqdm

from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric


def load_embedding(file_path: Path) -> np.ndarray:
"""
Load a SignCLIP embedding from a .npy file, ensuring it has the correct shape.
Expand Down Expand Up @@ -61,7 +64,10 @@ def get_embedding(video_file):


def calculate_mean_distances(
distance_matrix: torch.Tensor, indices_a: torch.Tensor, indices_b: torch.Tensor, exclude_self: bool = False
distance_matrix: torch.Tensor,
indices_a: torch.Tensor,
indices_b: torch.Tensor,
exclude_self: bool = False,
) -> float:
"""
Calculate the mean of distances between two sets of indices in a 2D distance matrix.
Expand Down Expand Up @@ -92,7 +98,6 @@ def calculate_mean_distances(


def generate_synthetic_data(num_items, num_classes, num_items_per_class=4):

torch.manual_seed(42)
random.seed(42)
# distance_matrix = torch.rand((num_items, num_items)) * 100
Expand Down Expand Up @@ -238,7 +243,7 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, out_path: Path, kind: str

find_class_distances_end = time.perf_counter()

print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}")
print(f"Finding within and without took {find_class_distances_end - find_class_distances_start}")

analysis_end = time.perf_counter()
analysis_duration = analysis_end - analysis_start
Expand Down Expand Up @@ -288,8 +293,17 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, out_path: Path, kind: str

def main():
parser = argparse.ArgumentParser(description="Evaluate SignCLIP embeddings with score_all.")
parser.add_argument("emb_dir", type=Path, help="Path to the directory containing SignCLIP .npy files")
parser.add_argument("--split_file", type=Path, required=True, help="Path to the split CSV file (e.g., test.csv)")
parser.add_argument(
"emb_dir",
type=Path,
help="Path to the directory containing SignCLIP .npy files",
)
parser.add_argument(
"--split_file",
type=Path,
required=True,
help="Path to the split CSV file (e.g., test.csv)",
)
parser.add_argument(
"--kind",
type=str,
Expand All @@ -298,7 +312,11 @@ def main():
help="Type of distance metric to use (default: cosine)",
)

parser.add_argument("--out_path", type=Path, help="Where to save output distance npz matrix+file list")
parser.add_argument(
"--out_path",
type=Path,
help="Where to save output distance npz matrix+file list",
)

args = parser.parse_args()

Expand All @@ -311,7 +329,12 @@ def main():

print(f"Scores will be saved to {output_file}")

evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, out_path=output_file, kind=args.kind)
evaluate_signclip(
emb_dir=args.emb_dir,
split_file=args.split_file,
out_path=output_file,
kind=args.kind,
)


if __name__ == "__main__":
Expand Down
27 changes: 10 additions & 17 deletions pose_evaluation/examples/example_metric_construction.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
from pathlib import Path

from pose_format import Pose
from pose_evaluation.metrics.distance_metric import DistanceMetric
from pose_evaluation.metrics.distance_measure import AggregatedPowerDistance

from pose_evaluation.metrics.base import BaseMetric
from pose_evaluation.metrics.distance_measure import AggregatedPowerDistance
from pose_evaluation.metrics.distance_metric import DistanceMetric
from pose_evaluation.metrics.test_distance_metric import get_poses
from pose_evaluation.utils.pose_utils import zero_pad_shorter_poses

if __name__ == "__main__":
# Define file paths for test pose data
reference_file = (
Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-1-HOUSE.pose"
)
hypothesis_file = (
Path("pose_evaluation") / "utils" / "test" / "test_data" / "colin-2-HOUSE.pose"
)
test_data_path = Path("pose_evaluation") / "utils" / "test" / "test_data"
reference_file = test_data_path / "colin-1-HOUSE.pose"
hypothesis_file = test_data_path / "colin-2-HOUSE.pose"

# Choose whether to load real files or generate test poses
# They have different lengths, and so some metrics will crash!
Expand All @@ -33,25 +32,19 @@
poses = [hypothesis, reference]

# Define distance metrics
mean_l1_metric = DistanceMetric(
"mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 17)
)
mean_l1_metric = DistanceMetric("mean_l1_metric", distance_measure=AggregatedPowerDistance(1, 17))
metrics = [
BaseMetric("base"),
DistanceMetric("PowerDistanceMetric", AggregatedPowerDistance(2, 1)),
DistanceMetric("AnotherPowerDistanceMetric", AggregatedPowerDistance(1, 10)),
mean_l1_metric,
DistanceMetric(
"max_l1_metric",
AggregatedPowerDistance(
order=1, aggregation_strategy="max", default_distance=0
),
AggregatedPowerDistance(order=1, aggregation_strategy="max", default_distance=0),
),
DistanceMetric(
"MeanL2Score",
AggregatedPowerDistance(
order=2, aggregation_strategy="mean", default_distance=0
),
AggregatedPowerDistance(order=2, aggregation_strategy="mean", default_distance=0),
),
]

Expand Down
25 changes: 7 additions & 18 deletions pose_evaluation/metrics/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pylint: disable=undefined-variable
from typing import Any, Callable, Sequence

from tqdm import tqdm


Expand Down Expand Up @@ -83,9 +84,7 @@ def __call__(self, hypothesis: T, reference: T) -> float:
def score(self, hypothesis: T, reference: T) -> float:
raise NotImplementedError

def score_with_signature(
self, hypothesis: T, reference: T, short: bool = False
) -> Score:
def score_with_signature(self, hypothesis: T, reference: T, short: bool = False) -> Score:
return Score(
name=self.name,
score=self.score(hypothesis, reference),
Expand All @@ -96,29 +95,19 @@ def score_max(self, hypothesis: T, references: Sequence[T]) -> float:
all_scores = self.score_all([hypothesis], references)
return max(max(scores) for scores in all_scores)

def validate_corpus_score_input(
self, hypotheses: Sequence[T], references: Sequence[Sequence[T]]
):
def validate_corpus_score_input(self, hypotheses: Sequence[T], references: Sequence[Sequence[T]]):
# This method is designed to avoid mistakes in the use of the corpus_score method
for reference in references:
assert len(hypotheses) == len(
reference
), "Hypothesis and reference must have the same number of instances"
assert len(hypotheses) == len(reference), "Hypothesis and reference must have the same number of instances"

def corpus_score(
self, hypotheses: Sequence[T], references: Sequence[list[T]]
) -> float:
def corpus_score(self, hypotheses: Sequence[T], references: Sequence[list[T]]) -> float:
"""Default implementation: average over sentence scores."""
self.validate_corpus_score_input(hypotheses, references)
transpose_references = list(zip(*references))
scores = [
self.score_max(h, r) for h, r in zip(hypotheses, transpose_references)
]
scores = [self.score_max(h, r) for h, r in zip(hypotheses, transpose_references)]
return sum(scores) / len(hypotheses)

def score_all(
self, hypotheses: Sequence[T], references: Sequence[T], progress_bar=True
) -> list[list[float]]:
def score_all(self, hypotheses: Sequence[T], references: Sequence[T], progress_bar=True) -> list[list[float]]:
"""Call the score function for each hypothesis-reference pair."""
return [
[self.score(h, r) for r in references]
Expand Down
3 changes: 2 additions & 1 deletion pose_evaluation/metrics/base_embedding_metric.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import TypeVar

import torch
from pose_evaluation.metrics.base import BaseMetric

from pose_evaluation.metrics.base import BaseMetric

# Define a type alias for embeddings (e.g., torch.Tensor)
Embedding = TypeVar("Embedding", bound=torch.Tensor)
Expand Down
12 changes: 7 additions & 5 deletions pose_evaluation/metrics/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import shutil
from pathlib import Path
from typing import Callable, Union
import torch

import numpy as np
import pytest
import torch


@pytest.fixture(scope="session", autouse=True)
Expand All @@ -20,11 +21,12 @@ def clean_test_artifacts():
@pytest.fixture(name="distance_matrix_shape_checker")
def fixture_distance_matrix_shape_checker() -> Callable[[torch.Tensor, torch.Tensor], None]:
def _check_shape(hyp_count: int, ref_count: int, distance_matrix: torch.Tensor):

expected_shape = torch.Size([hyp_count, ref_count])
assert (
distance_matrix.shape == expected_shape
), f"For M={hyp_count} hypotheses, N={ref_count} references, Distance Matrix should be MxN={expected_shape}. Instead, received {distance_matrix.shape}"
assert distance_matrix.shape == expected_shape, (
f"For M={hyp_count} hypotheses, N={ref_count} references, "
f"Distance Matrix should be MxN={expected_shape}. "
f"Instead, received {distance_matrix.shape}"
)

return _check_shape

Expand Down
25 changes: 14 additions & 11 deletions pose_evaluation/metrics/distance_measure.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from typing import Literal, Dict, Any

import numpy.ma as ma # pylint: disable=consider-using-from-import

from pose_evaluation.metrics.base import Signature

AggregationStrategy = Literal["max", "min", "mean", "sum"]


class DistanceMeasureSignature(Signature):
"""Signature for distance measure metrics."""

def __init__(self, name: str, args: Dict[str, Any]) -> None:
super().__init__(name=name, args=args)
self.update_abbr("distance", "dist")
Expand All @@ -14,6 +18,7 @@ def __init__(self, name: str, args: Dict[str, Any]) -> None:

class DistanceMeasure:
"""Abstract base class for distance measures."""

_SIGNATURE_TYPE = DistanceMeasureSignature

def __init__(self, name: str) -> None:
Expand All @@ -22,7 +27,7 @@ def __init__(self, name: str) -> None:
def get_distance(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray) -> float:
"""
Compute the distance between hypothesis and reference data.

This method should be implemented by subclasses.
"""
raise NotImplementedError
Expand All @@ -37,6 +42,7 @@ def get_signature(self) -> Signature:

class PowerDistanceSignature(DistanceMeasureSignature):
"""Signature for power distance measures."""

def __init__(self, name: str, args: Dict[str, Any]) -> None:
super().__init__(name=name, args=args)
self.update_signature_and_abbr("order", "ord", args)
Expand All @@ -46,6 +52,7 @@ def __init__(self, name: str, args: Dict[str, Any]) -> None:

class AggregatedPowerDistance(DistanceMeasure):
"""Aggregated power distance metric using a specified aggregation strategy."""

_SIGNATURE_TYPE = PowerDistanceSignature

def __init__(
Expand All @@ -56,7 +63,7 @@ def __init__(
) -> None:
"""
Initialize the aggregated power distance metric.

:param order: The exponent to which differences are raised.
:param default_distance: The value to fill in for masked entries.
:param aggregation_strategy: Strategy to aggregate computed distances.
Expand All @@ -69,7 +76,7 @@ def __init__(
def _aggregate(self, distances: ma.MaskedArray) -> float:
"""
Aggregate computed distances using the specified strategy.

:param distances: A masked array of computed distances.
:return: A single aggregated distance value.
"""
Expand All @@ -82,23 +89,19 @@ def _aggregate(self, distances: ma.MaskedArray) -> float:
if self.aggregation_strategy in aggregation_funcs:
return aggregation_funcs[self.aggregation_strategy]()

raise NotImplementedError(
f"Aggregation Strategy {self.aggregation_strategy} not implemented"
)
raise NotImplementedError(f"Aggregation Strategy {self.aggregation_strategy} not implemented")

def _calculate_distances(
self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray
) -> ma.MaskedArray:
def _calculate_distances(self, hyp_data: ma.MaskedArray, ref_data: ma.MaskedArray) -> ma.MaskedArray:
"""
Compute element-wise distances between hypothesis and reference data.

Steps:
1. Compute the absolute differences.
2. Raise the differences to the specified power.
3. Sum the powered differences along the last axis.
4. Extract the root corresponding to the power.
5. Fill masked values with the default distance.

:param hyp_data: Hypothesis data as a masked array.
:param ref_data: Reference data as a masked array.
:return: A masked array of computed distances.
Expand Down
1 change: 1 addition & 0 deletions pose_evaluation/metrics/distance_metric.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pose_format import Pose

from pose_evaluation.metrics.base_pose_metric import PoseMetric
from pose_evaluation.metrics.distance_measure import DistanceMeasure

Expand Down
13 changes: 4 additions & 9 deletions pose_evaluation/metrics/embedding_distance_metric.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from typing import Literal, List, Union
import logging
from typing import Literal, List, Union

import numpy as np
import torch
from sentence_transformers import util as st_util
from torch import Tensor
from torch.types import Number
import numpy as np
from sentence_transformers import util as st_util

from pose_evaluation.metrics.base_embedding_metric import EmbeddingMetric


# Useful reference: https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/util.py#L31
# * Helper functions such as batch_to_device, _convert_to_tensor, _convert_to_batch, _convert_to_batch_tensor
# * a whole semantic search function, with chunking and top_k
Expand Down Expand Up @@ -86,11 +85,7 @@ def _to_batch_tensor_on_device(self, data: TensorConvertableType) -> Tensor:

return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=self.dtype)

def score(
self,
hypothesis: TensorConvertableType,
reference: TensorConvertableType,
) -> Number:
def score(self, hypothesis: TensorConvertableType, reference: TensorConvertableType) -> Number:
"""
Compute the distance between two embeddings.

Expand Down
Loading
Loading