From 6767c86ed7abd44ac93fd51f65317122121250e6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 20 Nov 2025 20:14:36 +0000 Subject: [PATCH 1/4] Initial plan From 4ce9351659270bdd3896bf987c2ea3806dc56bf8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 20 Nov 2025 20:18:22 +0000 Subject: [PATCH 2/4] Add sort_index() to fix PositionGroup.fetch_position_info() and add test Co-authored-by: edeno <8053989+edeno@users.noreply.github.com> --- src/spyglass/decoding/v1/core.py | 4 +++- tests/decoding/test_core.py | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/spyglass/decoding/v1/core.py b/src/spyglass/decoding/v1/core.py index ec5cfbf1f..13a7e1934 100644 --- a/src/spyglass/decoding/v1/core.py +++ b/src/spyglass/decoding/v1/core.py @@ -228,7 +228,9 @@ def fetch_position_info( min_time = min([df.index.min() for df in position_info]) if max_time is None: max_time = max([df.index.max() for df in position_info]) - position_info = pd.concat(position_info, axis=0).loc[min_time:max_time] + position_info = ( + pd.concat(position_info, axis=0).sort_index().loc[min_time:max_time] + ) return position_info, position_variable_names diff --git a/tests/decoding/test_core.py b/tests/decoding/test_core.py index a1b5b42cf..b56c53bf2 100644 --- a/tests/decoding/test_core.py +++ b/tests/decoding/test_core.py @@ -1,4 +1,6 @@ import pytest +import pandas as pd +import numpy as np def test_decode_param_fetch(decode_v1, decode_clusterless_params_insert): @@ -20,3 +22,27 @@ def test_upsampled_pos_group(pop_pos_group_upsampled): ret = pop_pos_group_upsampled.fetch_position_info()[0] sample_freq = ret.index.to_series().diff().mode().iloc[0] pytest.approx(sample_freq, 0.001) == 1 / 250, "Upsampled data not at 250 Hz" + + +def test_position_group_non_chronological_order(pop_pos_group): + """Test that fetch_position_info handles non-chronological merge_id order. + + This test verifies that when position data from multiple merge_ids is + concatenated, the result is properly sorted by time index before slicing. + This prevents returning an empty dataframe when merge_ids are not in + chronological order. + """ + # Fetch position info - internally may not be in chronological order + position_info, position_variables = pop_pos_group.fetch_position_info() + + # Verify the dataframe is not empty + assert len(position_info) > 0, "Position info should not be empty" + + # Verify the index is sorted (monotonically increasing) + assert ( + position_info.index.is_monotonic_increasing + ), "Position info index should be sorted in chronological order" + + # Verify position variables are present + assert position_variables is not None + assert len(position_variables) > 0 From 4dcb24934974df6d508eb350049c21d972c4acfa Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Fri, 21 Nov 2025 07:06:22 -0800 Subject: [PATCH 3/4] Update tests/decoding/test_core.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/decoding/test_core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/decoding/test_core.py b/tests/decoding/test_core.py index b56c53bf2..88d4e387f 100644 --- a/tests/decoding/test_core.py +++ b/tests/decoding/test_core.py @@ -1,6 +1,4 @@ import pytest -import pandas as pd -import numpy as np def test_decode_param_fetch(decode_v1, decode_clusterless_params_insert): From 73364900fac4a533efe89ea55a2d6564478ddfd0 Mon Sep 17 00:00:00 2001 From: Eric Denovellis Date: Fri, 21 Nov 2025 12:40:41 -0500 Subject: [PATCH 4/4] Ensure DataFrame index is sorted before slicing Added sort_index() before .loc[min:max] slicing in clusterless.py, core.py, and sorted_spikes.py to prevent empty DataFrame results when indices are unsorted. Includes a regression test in test_core.py to verify correct behavior and address issue #1471. --- src/spyglass/decoding/v1/clusterless.py | 14 +++++-- src/spyglass/decoding/v1/core.py | 3 ++ src/spyglass/decoding/v1/sorted_spikes.py | 14 +++++-- tests/decoding/test_core.py | 46 +++++++++++++++++++++++ 4 files changed, 69 insertions(+), 8 deletions(-) diff --git a/src/spyglass/decoding/v1/clusterless.py b/src/spyglass/decoding/v1/clusterless.py index 3c753268c..977cd8e23 100644 --- a/src/spyglass/decoding/v1/clusterless.py +++ b/src/spyglass/decoding/v1/clusterless.py @@ -522,10 +522,16 @@ def fetch_linear_position_info(cls, key): min_time, max_time = _get_interval_range(key) - return pd.concat( - [linear_position_df.set_index(position_df.index), position_df], - axis=1, - ).loc[min_time:max_time] + # sort_index() for defensive programming - ensures chronological order + # before .loc[] slice. See: github.com/LorenFrankLab/spyglass/issues/1471 + return ( + pd.concat( + [linear_position_df.set_index(position_df.index), position_df], + axis=1, + ) + .sort_index() + .loc[min_time:max_time] + ) @classmethod def fetch_spike_data(cls, key, filter_by_interval=True): diff --git a/src/spyglass/decoding/v1/core.py b/src/spyglass/decoding/v1/core.py index 13a7e1934..c5ce5f034 100644 --- a/src/spyglass/decoding/v1/core.py +++ b/src/spyglass/decoding/v1/core.py @@ -228,6 +228,9 @@ def fetch_position_info( min_time = min([df.index.min() for df in position_info]) if max_time is None: max_time = max([df.index.max() for df in position_info]) + # sort_index() required: merge_ids may be fetched in non-chronological + # order (e.g., alphabetically by UUID), causing .loc[min:max] to return + # empty on unsorted index. See: github.com/LorenFrankLab/spyglass/issues/1471 position_info = ( pd.concat(position_info, axis=0).sort_index().loc[min_time:max_time] ) diff --git a/src/spyglass/decoding/v1/sorted_spikes.py b/src/spyglass/decoding/v1/sorted_spikes.py index 6a773bce9..3fef77472 100644 --- a/src/spyglass/decoding/v1/sorted_spikes.py +++ b/src/spyglass/decoding/v1/sorted_spikes.py @@ -475,10 +475,16 @@ def fetch_linear_position_info(cls, key): ) min_time, max_time = _get_interval_range(key) - return pd.concat( - [linear_position_df.set_index(position_df.index), position_df], - axis=1, - ).loc[min_time:max_time] + # sort_index() for defensive programming - ensures chronological order + # before .loc[] slice. See: github.com/LorenFrankLab/spyglass/issues/1471 + return ( + pd.concat( + [linear_position_df.set_index(position_df.index), position_df], + axis=1, + ) + .sort_index() + .loc[min_time:max_time] + ) @classmethod def fetch_spike_data( diff --git a/tests/decoding/test_core.py b/tests/decoding/test_core.py index 88d4e387f..f48c750b4 100644 --- a/tests/decoding/test_core.py +++ b/tests/decoding/test_core.py @@ -1,6 +1,52 @@ +import numpy as np +import pandas as pd import pytest +def test_concat_sort_index_slice_unsorted_data(): + """Unit test verifying sort_index is required before .loc[] slice. + + Regression test for bug where pd.concat followed by .loc[min:max] on + unsorted index returns empty DataFrame. This test explicitly constructs + unsorted data to guarantee the bug scenario is exercised, independent + of fixture data ordering. + + See: https://github.com/LorenFrankLab/spyglass/issues/1471 + """ + # Simulate out-of-order position data (second chunk has earlier times) + # This mimics when merge_ids are fetched alphabetically rather than + # chronologically + df1 = pd.DataFrame( + {"x": [1.0, 2.0], "y": [10.0, 20.0]}, index=[10.0, 11.0] + ) # Later times + df2 = pd.DataFrame( + {"x": [3.0, 4.0], "y": [30.0, 40.0]}, index=[5.0, 6.0] + ) # Earlier times + + min_time, max_time = 5.0, 11.0 + + # Without sort_index, .loc[] on unsorted index returns empty DataFrame + unsorted = pd.concat([df1, df2], axis=0) + assert ( + not unsorted.index.is_monotonic_increasing + ), "Sanity check: concat result should be unsorted" + assert ( + len(unsorted.loc[min_time:max_time]) == 0 + ), "Sanity check: unsorted .loc[] slice returns empty" + + # With sort_index, .loc[] returns all data correctly + sorted_result = ( + pd.concat([df1, df2], axis=0).sort_index().loc[min_time:max_time] + ) + assert len(sorted_result) == 4, "Sorted slice should contain all 4 rows" + assert ( + sorted_result.index.is_monotonic_increasing + ), "Result should be sorted" + np.testing.assert_array_equal( + sorted_result.index.values, [5.0, 6.0, 10.0, 11.0] + ) + + def test_decode_param_fetch(decode_v1, decode_clusterless_params_insert): from non_local_detector.environment import Environment