From 6767c86ed7abd44ac93fd51f65317122121250e6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 20 Nov 2025 20:14:36 +0000
Subject: [PATCH 1/4] Initial plan


From 4ce9351659270bdd3896bf987c2ea3806dc56bf8 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 20 Nov 2025 20:18:22 +0000
Subject: [PATCH 2/4] Add sort_index() to fix
 PositionGroup.fetch_position_info() and add test

Co-authored-by: edeno <8053989+edeno@users.noreply.github.com>
---
 src/spyglass/decoding/v1/core.py |  4 +++-
 tests/decoding/test_core.py      | 26 ++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/spyglass/decoding/v1/core.py b/src/spyglass/decoding/v1/core.py
index ec5cfbf1f..13a7e1934 100644
--- a/src/spyglass/decoding/v1/core.py
+++ b/src/spyglass/decoding/v1/core.py
@@ -228,7 +228,9 @@ def fetch_position_info(
             min_time = min([df.index.min() for df in position_info])
         if max_time is None:
             max_time = max([df.index.max() for df in position_info])
-        position_info = pd.concat(position_info, axis=0).loc[min_time:max_time]
+        position_info = (
+            pd.concat(position_info, axis=0).sort_index().loc[min_time:max_time]
+        )
 
         return position_info, position_variable_names
 
diff --git a/tests/decoding/test_core.py b/tests/decoding/test_core.py
index a1b5b42cf..b56c53bf2 100644
--- a/tests/decoding/test_core.py
+++ b/tests/decoding/test_core.py
@@ -1,4 +1,6 @@
 import pytest
+import pandas as pd
+import numpy as np
 
 
 def test_decode_param_fetch(decode_v1, decode_clusterless_params_insert):
@@ -20,3 +22,27 @@ def test_upsampled_pos_group(pop_pos_group_upsampled):
     ret = pop_pos_group_upsampled.fetch_position_info()[0]
     sample_freq = ret.index.to_series().diff().mode().iloc[0]
     pytest.approx(sample_freq, 0.001) == 1 / 250, "Upsampled data not at 250 Hz"
+
+
+def test_position_group_non_chronological_order(pop_pos_group):
+    """Test that fetch_position_info handles non-chronological merge_id order.
+
+    This test verifies that when position data from multiple merge_ids is
+    concatenated, the result is properly sorted by time index before slicing.
+    This prevents returning an empty dataframe when merge_ids are not in
+    chronological order.
+    """
+    # Fetch position info - internally may not be in chronological order
+    position_info, position_variables = pop_pos_group.fetch_position_info()
+
+    # Verify the dataframe is not empty
+    assert len(position_info) > 0, "Position info should not be empty"
+
+    # Verify the index is sorted (monotonically increasing)
+    assert (
+        position_info.index.is_monotonic_increasing
+    ), "Position info index should be sorted in chronological order"
+
+    # Verify position variables are present
+    assert position_variables is not None
+    assert len(position_variables) > 0

From 4dcb24934974df6d508eb350049c21d972c4acfa Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@users.noreply.github.com>
Date: Fri, 21 Nov 2025 07:06:22 -0800
Subject: [PATCH 3/4] Update tests/decoding/test_core.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/decoding/test_core.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/decoding/test_core.py b/tests/decoding/test_core.py
index b56c53bf2..88d4e387f 100644
--- a/tests/decoding/test_core.py
+++ b/tests/decoding/test_core.py
@@ -1,6 +1,4 @@
 import pytest
-import pandas as pd
-import numpy as np
 
 
 def test_decode_param_fetch(decode_v1, decode_clusterless_params_insert):

From 73364900fac4a533efe89ea55a2d6564478ddfd0 Mon Sep 17 00:00:00 2001
From: Eric Denovellis <edeno@bu.edu>
Date: Fri, 21 Nov 2025 12:40:41 -0500
Subject: [PATCH 4/4] Ensure DataFrame index is sorted before slicing

Added sort_index() before .loc[min:max] slicing in clusterless.py, core.py, and sorted_spikes.py to prevent empty DataFrame results when indices are unsorted. Includes a regression test in test_core.py to verify correct behavior and address issue #1471.
---
 src/spyglass/decoding/v1/clusterless.py   | 14 +++++--
 src/spyglass/decoding/v1/core.py          |  3 ++
 src/spyglass/decoding/v1/sorted_spikes.py | 14 +++++--
 tests/decoding/test_core.py               | 46 +++++++++++++++++++++++
 4 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/src/spyglass/decoding/v1/clusterless.py b/src/spyglass/decoding/v1/clusterless.py
index 3c753268c..977cd8e23 100644
--- a/src/spyglass/decoding/v1/clusterless.py
+++ b/src/spyglass/decoding/v1/clusterless.py
@@ -522,10 +522,16 @@ def fetch_linear_position_info(cls, key):
 
         min_time, max_time = _get_interval_range(key)
 
-        return pd.concat(
-            [linear_position_df.set_index(position_df.index), position_df],
-            axis=1,
-        ).loc[min_time:max_time]
+        # sort_index() for defensive programming - ensures chronological order
+        # before .loc[] slice. See: github.com/LorenFrankLab/spyglass/issues/1471
+        return (
+            pd.concat(
+                [linear_position_df.set_index(position_df.index), position_df],
+                axis=1,
+            )
+            .sort_index()
+            .loc[min_time:max_time]
+        )
 
     @classmethod
     def fetch_spike_data(cls, key, filter_by_interval=True):
diff --git a/src/spyglass/decoding/v1/core.py b/src/spyglass/decoding/v1/core.py
index 13a7e1934..c5ce5f034 100644
--- a/src/spyglass/decoding/v1/core.py
+++ b/src/spyglass/decoding/v1/core.py
@@ -228,6 +228,9 @@ def fetch_position_info(
             min_time = min([df.index.min() for df in position_info])
         if max_time is None:
             max_time = max([df.index.max() for df in position_info])
+        # sort_index() required: merge_ids may be fetched in non-chronological
+        # order (e.g., alphabetically by UUID), causing .loc[min:max] to return
+        # empty on unsorted index. See: github.com/LorenFrankLab/spyglass/issues/1471
         position_info = (
             pd.concat(position_info, axis=0).sort_index().loc[min_time:max_time]
         )
diff --git a/src/spyglass/decoding/v1/sorted_spikes.py b/src/spyglass/decoding/v1/sorted_spikes.py
index 6a773bce9..3fef77472 100644
--- a/src/spyglass/decoding/v1/sorted_spikes.py
+++ b/src/spyglass/decoding/v1/sorted_spikes.py
@@ -475,10 +475,16 @@ def fetch_linear_position_info(cls, key):
         )
         min_time, max_time = _get_interval_range(key)
 
-        return pd.concat(
-            [linear_position_df.set_index(position_df.index), position_df],
-            axis=1,
-        ).loc[min_time:max_time]
+        # sort_index() for defensive programming - ensures chronological order
+        # before .loc[] slice. See: github.com/LorenFrankLab/spyglass/issues/1471
+        return (
+            pd.concat(
+                [linear_position_df.set_index(position_df.index), position_df],
+                axis=1,
+            )
+            .sort_index()
+            .loc[min_time:max_time]
+        )
 
     @classmethod
     def fetch_spike_data(
diff --git a/tests/decoding/test_core.py b/tests/decoding/test_core.py
index 88d4e387f..f48c750b4 100644
--- a/tests/decoding/test_core.py
+++ b/tests/decoding/test_core.py
@@ -1,6 +1,52 @@
+import numpy as np
+import pandas as pd
 import pytest
 
 
+def test_concat_sort_index_slice_unsorted_data():
+    """Unit test verifying sort_index is required before .loc[] slice.
+
+    Regression test for bug where pd.concat followed by .loc[min:max] on
+    unsorted index returns empty DataFrame. This test explicitly constructs
+    unsorted data to guarantee the bug scenario is exercised, independent
+    of fixture data ordering.
+
+    See: https://github.com/LorenFrankLab/spyglass/issues/1471
+    """
+    # Simulate out-of-order position data (second chunk has earlier times)
+    # This mimics when merge_ids are fetched alphabetically rather than
+    # chronologically
+    df1 = pd.DataFrame(
+        {"x": [1.0, 2.0], "y": [10.0, 20.0]}, index=[10.0, 11.0]
+    )  # Later times
+    df2 = pd.DataFrame(
+        {"x": [3.0, 4.0], "y": [30.0, 40.0]}, index=[5.0, 6.0]
+    )  # Earlier times
+
+    min_time, max_time = 5.0, 11.0
+
+    # Without sort_index, .loc[] on unsorted index returns empty DataFrame
+    unsorted = pd.concat([df1, df2], axis=0)
+    assert (
+        not unsorted.index.is_monotonic_increasing
+    ), "Sanity check: concat result should be unsorted"
+    assert (
+        len(unsorted.loc[min_time:max_time]) == 0
+    ), "Sanity check: unsorted .loc[] slice returns empty"
+
+    # With sort_index, .loc[] returns all data correctly
+    sorted_result = (
+        pd.concat([df1, df2], axis=0).sort_index().loc[min_time:max_time]
+    )
+    assert len(sorted_result) == 4, "Sorted slice should contain all 4 rows"
+    assert (
+        sorted_result.index.is_monotonic_increasing
+    ), "Result should be sorted"
+    np.testing.assert_array_equal(
+        sorted_result.index.values, [5.0, 6.0, 10.0, 11.0]
+    )
+
+
 def test_decode_param_fetch(decode_v1, decode_clusterless_params_insert):
     from non_local_detector.environment import Environment