Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"pandas>=2.2.3",
"pydantic>=2.11.1"
"pydantic>=2.11.1",
"scipy>=1.15.2"
]

[dependency-groups]
Expand Down
17 changes: 17 additions & 0 deletions src/graphomotor/core/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Configuration module for the graphomotor repository."""

import numpy as np


def generate_reference_spiral() -> np.ndarray:
"""Generates a reference spiral for feature extraction purposes."""
Comment thread
alperkent marked this conversation as resolved.
Outdated
cx, cy = (50, 50) # center of the spiral
a = 0 # starting radius
b = 1.075 # growth rate
num_points = 10000
spiral_length = 8 * np.pi # spiral makes 4 full rotations
theta = np.linspace(0, spiral_length, num_points)
Comment thread
alperkent marked this conversation as resolved.
Outdated
r = a + b * theta
x = cx + r * np.cos(theta)
y = cy + r * np.sin(theta)
return np.column_stack((x, y))
1 change: 1 addition & 0 deletions src/graphomotor/features/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
""".. include:: ../../README.md""" # noqa: D415
123 changes: 123 additions & 0 deletions src/graphomotor/features/distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Feature extraction module for distance-based metrics in spiral drawing data."""

import numpy as np
from scipy import stats
from scipy.spatial import distance

from graphomotor.core import models


def _segment_data(data: np.ndarray, start_pct: float, end_pct: float) -> np.ndarray:
"""Extract segment of data based on percentage range.

Args:
Comment thread
alperkent marked this conversation as resolved.
data: Data to segment
start_pct: Start percentage [0-1)
end_pct: End percentage (0-1]

Returns:
Segmented data
"""
if not (0 <= start_pct < end_pct <= 1):
raise ValueError(
"Percentages must be between 0 and 1, "
"and start_pct must be less than end_pct"
)
num_samples = len(data)
start_idx = int(start_pct * num_samples)
end_idx = int(end_pct * num_samples)
return data[start_idx:end_idx]


def calculate_hausdorff_metrics(
spiral: models.Spiral, reference_spiral: np.ndarray
) -> dict:
"""Calculate Hausdorff distance metrics for a spiral object.

This function computes multiple features based on the Hausdorff distance between a
drawn spiral and a reference (ideal) spiral, as described in [1]. The Hausdorff
distance measures the maximum distance of a set to the nearest point in the other
set. This metric and its derivatives capture various aspects of the spatial
relationship between the drawn and reference spirals. Calculated features include:
- max_haus_dist: The maximum of the directed Hausdorff distances between the
Comment thread
alperkent marked this conversation as resolved.
Outdated
data points and the reference data points.
- sum_haus_dist: The sum of the directed Hausdorff distances.
- sum_haus_dist_time: The sum of the directed Hausdorff distances divided by
Comment thread
alperkent marked this conversation as resolved.
Outdated
the total drawing duration.
- iqr_haus_dist: The interquartile range of the directed Hausdorff distances.
- max_haus_dist_start: The maximum of the directed Hausdorff distances between
the beginning segment (0% to 25%) of data points and the beginning segment
of reference data points divided by the number of data points in the
beginning segment.
- max_haus_dist_end: The maximum of the directed Hausdorff distances in the
ending segment (75% to 100%) of data points and the ending segment of
reference data points divided by the number of data points in the ending
segment.
- max_haus_dist_mid: The maximum of the directed Hausdorff distances in the
middle segment (15% to 85%) of data points and the ending segment of
reference data points (this metric is not divided by the number of data
Comment thread
alperkent marked this conversation as resolved.
Outdated
points in the middle segment unlike previous ones).
- max_haus_dist_mid_time: The maximum of the directed Hausdorff distances in
the middle segment divided by the total drawing duration.

Args:
spiral: Spiral object with drawing data
reference_spiral: Reference spiral data for comparison

Returns:
Dictionary containing Hausdorff distance-based features

References:
[1] Messan, Komi S et al. “Assessment of Smartphone-Based Spiral Tracing in
Multiple Sclerosis Reveals Intra-Individual Reproducibility as a Major
Determinant of the Clinical Utility of the Digital Test.” Frontiers in
medical technology vol. 3 714682. 1 Feb. 2022, doi:10.3389/fmedt.2021.714682
"""
spiral_data = np.column_stack((spiral.data["x"].values, spiral.data["y"].values))

total_duration = spiral.data["seconds"].iloc[-1]

start_segment_data = _segment_data(spiral_data, 0.0, 0.25)
end_segment_data = _segment_data(spiral_data, 0.75, 1.0)
mid_segment_data = _segment_data(spiral_data, 0.15, 0.85)

if (
Comment thread
alperkent marked this conversation as resolved.
Outdated
len(start_segment_data) == 0
or len(end_segment_data) == 0
or len(mid_segment_data) == 0
):
raise ValueError(
"Segmented data is empty, check spiral data or segment percentages"
)

start_segment_ref = _segment_data(reference_spiral, 0.0, 0.25)
end_segment_ref = _segment_data(reference_spiral, 0.75, 1.0)
mid_segment_ref = _segment_data(reference_spiral, 0.15, 0.85)

haus_dist = [
Comment thread
alperkent marked this conversation as resolved.
distance.directed_hausdorff(spiral_data, reference_spiral)[0],
distance.directed_hausdorff(reference_spiral, spiral_data)[0],
]
haus_dist_start = [
distance.directed_hausdorff(start_segment_data, start_segment_ref)[0],
distance.directed_hausdorff(start_segment_ref, start_segment_data)[0],
]
haus_dist_end = [
distance.directed_hausdorff(end_segment_data, end_segment_ref)[0],
distance.directed_hausdorff(end_segment_ref, end_segment_data)[0],
]
haus_dist_mid = [
distance.directed_hausdorff(mid_segment_data, mid_segment_ref)[0],
distance.directed_hausdorff(mid_segment_ref, mid_segment_data)[0],
]

return {
"max_haus_dist": np.max(haus_dist),
"sum_haus_dist": np.sum(haus_dist),
"sum_haus_dist_time": np.sum(haus_dist) / total_duration,
"iqr_haus_dist": stats.iqr(haus_dist),
"max_haus_dist_start": np.max(haus_dist_start) / len(start_segment_data),
"max_haus_dist_end": np.max(haus_dist_end) / len(end_segment_data),
"max_haus_dist_mid": np.max(haus_dist_mid),
"max_haus_dist_mid_time": np.max(haus_dist_mid) / total_duration,
}
21 changes: 21 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
import datetime
import pathlib

import numpy as np
import pandas as pd
import pytest

from graphomotor.core import config, models


@pytest.fixture
def sample_data() -> pathlib.Path:
Expand Down Expand Up @@ -35,3 +38,21 @@ def valid_spiral_metadata() -> dict[str, str | datetime.datetime]:
tz=datetime.timezone.utc,
),
}


@pytest.fixture
def valid_spiral(
valid_spiral_data: pd.DataFrame,
valid_spiral_metadata: dict[str, str | datetime.datetime],
) -> models.Spiral:
"""Create a valid Spiral object."""
return models.Spiral(
data=valid_spiral_data,
metadata=valid_spiral_metadata,
)


@pytest.fixture
def reference_spiral() -> np.ndarray:
"""Create a reference spiral for testing."""
return config.generate_reference_spiral()
15 changes: 15 additions & 0 deletions tests/unit/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Test cases for config.py functions."""

import numpy as np
import pytest

from graphomotor.core import config


def test_generate_reference_spiral() -> None:
"""Test the generation of a reference spiral."""
spiral = config.generate_reference_spiral()
assert isinstance(spiral, np.ndarray)
assert spiral.shape == (10000, 2)
assert spiral[0] == pytest.approx([50, 50])
Comment thread
alperkent marked this conversation as resolved.
Outdated
assert spiral[-1] == pytest.approx([50 + 1.075 * 8 * np.pi, 50])
Comment thread
alperkent marked this conversation as resolved.
Outdated
134 changes: 134 additions & 0 deletions tests/unit/test_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Test cases for distance.py functions."""

import numpy as np
import pandas as pd
import pytest
import scipy.spatial.distance as dist
Comment thread
alperkent marked this conversation as resolved.
Outdated
from scipy import stats

from graphomotor.core import models
from graphomotor.features import distance


def test_segment_data_valid() -> None:
"""Test that the data is segmented correctly."""
data = np.array([[i, i] for i in range(100)])

segment = distance._segment_data(data, 0.1, 0.3)
assert len(segment) == 20
assert segment[0][0] == 10
assert segment[-1][0] == 29


@pytest.mark.parametrize(
"start_pct,end_pct",
[
(-0.1, 0.5),
(0.1, 1.1),
(0.6, 0.5),
(0.5, 0.5),
],
)
def test_segment_data_invalid(start_pct: float, end_pct: float) -> None:
"""Test that invalid percentages raise a ValueError."""
data = np.array([[i, i] for i in range(100)])

with pytest.raises(
ValueError,
match=(
"Percentages must be between 0 and 1, "
"and start_pct must be less than end_pct"
),
):
distance._segment_data(data, start_pct, end_pct)


def test_calculate_hausdorff_metrics(
valid_spiral: models.Spiral, reference_spiral: np.ndarray
) -> None:
"""Test that each Hausdorff metric is calculated."""
metrics = distance.calculate_hausdorff_metrics(valid_spiral, reference_spiral)

expected_metrics = [
"max_haus_dist",
"sum_haus_dist",
"sum_haus_dist_time",
"iqr_haus_dist",
"max_haus_dist_start",
"max_haus_dist_end",
"max_haus_dist_mid",
"max_haus_dist_mid_time",
]

for metric in expected_metrics:
assert metric in metrics
assert isinstance(metrics[metric], float)


def test_calculate_hausdorff_metrics_empty_segments(
valid_spiral_data: pd.DataFrame,
valid_spiral_metadata: dict,
reference_spiral: np.ndarray,
) -> None:
"""Test that empty segments raise a ValueError."""
small_spiral_data = valid_spiral_data.iloc[:3]
small_spiral = models.Spiral(
data=small_spiral_data,
metadata=valid_spiral_metadata,
)
with pytest.raises(
ValueError,
match="Segmented data is empty, check spiral data or segment percentages",
):
distance.calculate_hausdorff_metrics(small_spiral, reference_spiral)


def test_hausdorff_metrics_values(
Comment thread
alperkent marked this conversation as resolved.
Outdated
valid_spiral: models.Spiral, reference_spiral: np.ndarray
) -> None:
"""Test that Hausdorff metrics are calculated correctly."""
metrics = distance.calculate_hausdorff_metrics(valid_spiral, reference_spiral)

data = valid_spiral.data[["x", "y"]].values
ref_data = reference_spiral

total_duration = valid_spiral.data["seconds"].iloc[-1]

data_start = data[: int(len(data) * 0.25)]
data_end = data[int(len(data) * 0.75) :]
data_mid = data[int(len(data) * 0.15) : int(len(data) * 0.85)]

ref_data_start = ref_data[: int(len(ref_data) * 0.25)]
ref_data_end = ref_data[int(len(ref_data) * 0.75) :]
ref_data_mid = ref_data[int(len(ref_data) * 0.15) : int(len(ref_data) * 0.85)]

dist_matrix = dist.cdist(data, ref_data, "euclidean")
dist_matrix_start = dist.cdist(data_start, ref_data_start, "euclidean")
dist_matrix_end = dist.cdist(data_end, ref_data_end, "euclidean")
dist_matrix_mid = dist.cdist(data_mid, ref_data_mid, "euclidean")

haus_dist = [
np.max(np.min(dist_matrix, axis=0)),
np.max(np.min(dist_matrix, axis=1)),
]
haus_dist_start = [
np.max(np.min(dist_matrix_start, axis=0)),
np.max(np.min(dist_matrix_start, axis=1)),
]
haus_dist_end = [
np.max(np.min(dist_matrix_end, axis=0)),
np.max(np.min(dist_matrix_end, axis=1)),
]
haus_dist_mid = [
np.max(np.min(dist_matrix_mid, axis=0)),
np.max(np.min(dist_matrix_mid, axis=1)),
]

assert metrics["max_haus_dist"] == np.max(haus_dist)
assert metrics["sum_haus_dist"] == np.sum(haus_dist)
assert metrics["sum_haus_dist_time"] == np.sum(haus_dist) / total_duration
assert metrics["iqr_haus_dist"] == stats.iqr(haus_dist)
assert metrics["max_haus_dist_start"] == np.max(haus_dist_start) / len(data_start)
assert metrics["max_haus_dist_end"] == np.max(haus_dist_end) / len(data_end)
assert metrics["max_haus_dist_mid"] == np.max(haus_dist_mid)
assert metrics["max_haus_dist_mid_time"] == np.max(haus_dist_mid) / total_duration
Loading