Skip to content

Commit f7e5af4

Browse files
authored
Support array-type metadata fields in cubids group (#407)
* Support array-type metadata fields. * Try supporting lists of strings too. * Add test. * Update test_utils.py * Update stuff. * Update test_utils.py * Rename format_params to cluster_single_parameters. * Keep working. * Move cluster_single_parameters from cubids to utils. * Fix import. * Remove unused function. * Update utils.py * Update test_utils.py * Update round_params too. * Update test. * Update. * Update. * Fix possible bug from #439. If you have two unique values (NaNs and some actual value), it would label everything as cluster 0, but it should probably label the actual values as 0 and the NaNs as 1. * Allow ndarray metadata. * Add ImageOrientationPatientDICOM, remove Obliquity * Remove obliquity mentions. * Revert obliquity-related changes. * Update test_bond.py * Update example.rst
1 parent edf7a94 commit f7e5af4

File tree

4 files changed

+393
-78
lines changed

4 files changed

+393
-78
lines changed

cubids/tests/test_cubids.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -449,21 +449,6 @@ def _test__get_param_groups(cubids_instance):
449449
# Add assertions here
450450

451451

452-
def _test_round_params(cubids_instance):
453-
"""Test rounding parameters.
454-
455-
Parameters
456-
----------
457-
cubids_instance : CuBIDS
458-
An instance of the CuBIDS class.
459-
"""
460-
param_group_df = pd.DataFrame({"param": [0.123456789]})
461-
config = {"param": {"round": 3}}
462-
modality = "bold"
463-
rounded_params = cubids_instance.round_params(param_group_df, config, modality)
464-
# Add assertions here
465-
466-
467452
def _test_get_sidecar_metadata(cubids_instance):
468453
"""Test getting sidecar metadata.
469454
@@ -477,21 +462,6 @@ def _test_get_sidecar_metadata(cubids_instance):
477462
# Add assertions here
478463

479464

480-
def _test_format_params(cubids_instance):
481-
"""Test formatting parameters.
482-
483-
Parameters
484-
----------
485-
cubids_instance : CuBIDS
486-
An instance of the CuBIDS class.
487-
"""
488-
param_group_df = pd.DataFrame({"param": [0.123456789]})
489-
config = {"param": {"format": "{:.2f}"}}
490-
modality = "bold"
491-
formatted_params = cubids_instance.format_params(param_group_df, config, modality)
492-
# Add assertions here
493-
494-
495465
def _test__order_columns(cubids_instance):
496466
"""Test ordering columns.
497467

cubids/tests/test_utils.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
"""Tests for the utils module."""
2+
3+
import pandas as pd
4+
5+
from cubids import utils
6+
from cubids.tests.utils import compare_group_assignments
7+
8+
9+
def test_round_params():
10+
"""Test the cubids.utils.round_params function."""
11+
# Example DataFrame
12+
df = pd.DataFrame(
13+
{
14+
"A": [1.12345, 2.23456, 3.34567],
15+
"B": [[1.12345, 2.23456], [3.34567, 4.45678], [5.56789, 6.67890]],
16+
"C": ["text", "more text", "even more text"],
17+
"D": [1.12345, 2.23456, 3.34567],
18+
}
19+
)
20+
21+
# Example config
22+
config = {
23+
"sidecar_params": {
24+
"func": {
25+
"A": {"precision": 2},
26+
"B": {"precision": 2},
27+
},
28+
},
29+
"derived_params": {
30+
"func": {},
31+
},
32+
}
33+
34+
# Expected DataFrame after rounding
35+
expected_df = pd.DataFrame(
36+
{
37+
"A": [1.12, 2.23, 3.35],
38+
"B": [[1.12, 2.23], [3.35, 4.46], [5.57, 6.68]],
39+
"C": ["text", "more text", "even more text"],
40+
"D": [1.12345, 2.23456, 3.34567],
41+
}
42+
)
43+
44+
# Round columns
45+
rounded_df = utils.round_params(df, config, "func")
46+
47+
# Assert that the rounded DataFrame matches the expected DataFrame
48+
pd.testing.assert_frame_equal(rounded_df, expected_df)
49+
50+
51+
def test_cluster_single_parameters():
52+
"""Test the cubids.utils.cluster_single_parameters function.
53+
54+
We want to test that the function correctly clusters parameters based on the
55+
configuration dictionary.
56+
"""
57+
config = {
58+
"sidecar_params": {
59+
"func": {
60+
"RepetitionTime": {"tolerance": 0.01, "suggest_variant_rename": True},
61+
"TaskName": {"suggest_variant_rename": True},
62+
"SliceTiming": {"tolerance": 0.01, "suggest_variant_rename": True},
63+
"ImageType": {"suggest_variant_rename": True},
64+
},
65+
},
66+
"derived_params": {
67+
"func": {},
68+
},
69+
}
70+
71+
# Mock up the input. The variants are explicitly prepared.
72+
params = [
73+
{
74+
"RepetitionTime": 2.0,
75+
"TaskName": "rest eyes closed",
76+
"SliceTiming": [0.0, 1.0, 2.0],
77+
"ImageType": ["ORIGINAL", "NONE", "M"],
78+
},
79+
{
80+
"RepetitionTime": 2.0,
81+
"TaskName": "rest eyes closed",
82+
"SliceTiming": [0.0, 1.0, 2.0],
83+
"ImageType": ["ORIGINAL", "NONE", "M"],
84+
},
85+
{
86+
"RepetitionTime": 2.0,
87+
# TaskName variant
88+
"TaskName": "rest eyes open",
89+
"SliceTiming": [0.0, 1.0, 2.0],
90+
"ImageType": ["ORIGINAL", "NONE", "M"],
91+
},
92+
{
93+
# RepetitionTime variant
94+
"RepetitionTime": 1.9,
95+
"TaskName": "rest eyes closed",
96+
"SliceTiming": [0.0, 1.0, 2.0],
97+
"ImageType": ["ORIGINAL", "NONE", "M"],
98+
},
99+
{
100+
"RepetitionTime": 2.0,
101+
"TaskName": "rest eyes closed",
102+
# SliceTiming variant (length)
103+
"SliceTiming": [0.0, 0.5, 1.0, 1.5, 2.0],
104+
"ImageType": ["ORIGINAL", "NONE", "M"],
105+
},
106+
{
107+
"RepetitionTime": 2.0,
108+
"TaskName": "rest eyes closed",
109+
# SliceTiming variant (values)
110+
"SliceTiming": [0.0, 1.0, 1.9],
111+
"ImageType": ["ORIGINAL", "NONE", "M"],
112+
},
113+
{
114+
"RepetitionTime": 2.0,
115+
"TaskName": "rest eyes closed",
116+
"SliceTiming": [0.0, 1.0, 2.0],
117+
# ImageType variant (length)
118+
"ImageType": ["ORIGINAL", "NONE", "M", "NORM"],
119+
},
120+
{
121+
"RepetitionTime": 2.0,
122+
"TaskName": "rest eyes closed",
123+
"SliceTiming": [0.0, 1.0, 2.0],
124+
# ImageType variant (values)
125+
"ImageType": ["ORIGINAL", "NONE", "P"],
126+
},
127+
]
128+
files_df = pd.DataFrame(params)
129+
modality = "func"
130+
131+
# Run the function
132+
out_df = utils.cluster_single_parameters(
133+
df=files_df,
134+
config=config,
135+
modality=modality,
136+
)
137+
assert isinstance(out_df, pd.DataFrame)
138+
assert "Cluster_RepetitionTime" in out_df.columns
139+
assert "Cluster_SliceTiming" in out_df.columns
140+
assert "Cluster_ImageType" in out_df.columns
141+
# Non-list columns without tolerance don't get clustered
142+
assert "Cluster_TaskName" not in out_df.columns
143+
144+
assert compare_group_assignments(
145+
out_df["Cluster_RepetitionTime"].values.astype(int),
146+
[0, 0, 0, 1, 0, 0, 0, 0],
147+
)
148+
assert compare_group_assignments(
149+
out_df["Cluster_SliceTiming"].values.astype(int),
150+
[0, 0, 0, 0, 2, 1, 0, 0],
151+
)
152+
assert compare_group_assignments(
153+
out_df["Cluster_ImageType"].values.astype(int),
154+
[0, 0, 0, 0, 0, 0, 1, 2],
155+
)
156+
157+
# Change the tolerance for SliceTiming
158+
config["sidecar_params"]["func"]["SliceTiming"]["tolerance"] = 0.5
159+
out_df = utils.cluster_single_parameters(
160+
df=files_df,
161+
config=config,
162+
modality=modality,
163+
)
164+
assert isinstance(out_df, pd.DataFrame)
165+
assert "Cluster_RepetitionTime" in out_df.columns
166+
assert "Cluster_SliceTiming" in out_df.columns
167+
assert "Cluster_ImageType" in out_df.columns
168+
# Non-list columns without tolerance don't get clustered
169+
assert "Cluster_TaskName" not in out_df.columns
170+
171+
assert compare_group_assignments(
172+
out_df["Cluster_RepetitionTime"].values.astype(int),
173+
[0, 0, 0, 1, 0, 0, 0, 0],
174+
)
175+
# Different lengths still produce different clusters,
176+
# but the value-based variants are now the same
177+
assert compare_group_assignments(
178+
out_df["Cluster_SliceTiming"].values.astype(int),
179+
[0, 0, 0, 0, 1, 0, 0, 0],
180+
)
181+
assert compare_group_assignments(
182+
out_df["Cluster_ImageType"].values.astype(int),
183+
[0, 0, 0, 0, 0, 0, 1, 2],
184+
)

cubids/tests/utils.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,56 @@ def chdir(path):
167167
yield
168168
finally:
169169
os.chdir(oldpwd)
170+
171+
172+
def compare_group_assignments(list1, list2):
173+
"""Compare two lists for equality based on group assignments.
174+
175+
This function checks if two lists can be considered equal based on their group assignments.
176+
The actual values in the lists do not matter, only the group assignments do. Each unique value
177+
in the first list is mapped to a unique value in the second list, and the function checks if
178+
this mapping is consistent throughout the lists.
179+
180+
Parameters
181+
----------
182+
list1 : list
183+
The first list to compare.
184+
list2 : list
185+
The second list to compare.
186+
187+
Returns
188+
-------
189+
bool
190+
True if the lists are equal based on group assignments, False otherwise.
191+
192+
Examples
193+
--------
194+
>>> list1 = [1, 2, 1, 3, 2]
195+
>>> list2 = ['a', 'b', 'a', 'c', 'b']
196+
>>> compare_group_assignments(list1, list2)
197+
True
198+
199+
>>> list1 = [1, 2, 1, 3, 2]
200+
>>> list2 = ['b', 'd', 'b', 'q', 'd']
201+
>>> compare_group_assignments(list1, list2)
202+
True
203+
204+
>>> list1 = [1, 2, 1, 3, 2]
205+
>>> list2 = ['a', 'b', 'a', 'c', 'd']
206+
>>> compare_group_assignments(list1, list2)
207+
False
208+
"""
209+
if len(list1) != len(list2):
210+
return False
211+
212+
mapping = {}
213+
for a, b in zip(list1, list2):
214+
if a in mapping:
215+
if mapping[a] != b:
216+
return False
217+
else:
218+
if b in mapping.values():
219+
return False
220+
mapping[a] = b
221+
222+
return True

0 commit comments

Comments
 (0)