fix dataframe format for dandi

samuelbray32 · samuelbray32 · commit ce1ef8f2cbaa · 2025-09-24T13:25:06.000-07:00
diff --git a/src/ms_stim_analysis/AnalysisTables/credible_intervals.py b/src/ms_stim_analysis/AnalysisTables/credible_intervals.py
@@ -3,15 +3,18 @@
     get_HPD_spatial_coverage,
     get_highest_posterior_threshold,
 )
+import h5py
 import numpy as np
 import pandas as pd
 
 from spyglass.utils.dj_mixin import SpyglassMixin
 from spyglass.decoding.v1.clusterless import ClusterlessDecodingV1
 from spyglass.common import AnalysisNwbfile
+from spyglass.utils.dj_helper_fn import _resolve_external_table
 
 from .ms_opto_stim_protocol import OptoStimProtocol
 
+
 schema = dj.schema("ms_credible_interval")
 
 
@@ -78,9 +81,7 @@ def make(self, key):
         spatial_coverage = get_HPD_spatial_coverage(posterior, threshold)
 
         # save the results
-        credible_df = pd.DataFrame(
-            spatial_coverage, index=results.time, columns=["coverage"]
-        )
+        credible_df = pd.DataFrame({"coverage": spatial_coverage, "time": results.time})
 
         analysis_file_name = AnalysisNwbfile().create(key["nwb_file_name"])
         key["analysis_file_name"] = analysis_file_name
@@ -107,4 +108,46 @@ def alligned_response(self, key, marks, window=0.05):
     def fetch1_dataframe(self) -> pd.DataFrame:
         if not len(nwb := self.fetch_nwb()):
             raise ValueError("fetch1_dataframe must be called on a single key")
-        return nwb[0]["data"]
+        return nwb[0]["data"].set_index("time")
+
+
+def _update_coverage_df(analysis_file_name):
+    """Reformats the saved coverage dataframe to be consistent with Dandi standards.
+
+    For one-time use on 09/24/2025.
+    Not needed for future use.
+    For use specifically with the CredibleInterval table.
+
+    Args:
+        analysis_file_name (_type_): _description_
+    """
+    if not (CredibleInterval() & {"analysis_file_name": analysis_file_name}):
+        raise ValueError("analysis_file_name not found in CredibleInterval table")
+
+    path = AnalysisNwbfile().get_abs_path(analysis_file_name)
+    grp_path = "/scratch/credible_interval"
+    dataset_path = "/scratch/credible_interval/id"
+    time_dataset_path = "/scratch/credible_interval/time"
+    with h5py.File(path, "a") as file:
+        if time_dataset_path in file:
+            print("Time dataset already exists, skipping")
+            return
+
+        # interpolate time and add as new vector
+        grp = file[grp_path]
+        dset = file[dataset_path]
+        data = dset[()]
+        new_data = np.linspace(data.min(), data.max(), len(data))
+        grp.attrs["colnames"] = ["coverage", "time"]
+        time_vect = grp.create_dataset(
+            "time", data=new_data, compression="gzip", compression_opts=4
+        )
+        time_vect.attrs["neurodata_type"] = "VectorData"
+        time_vect.attrs["namespace"] = "core"
+        time_vect.attrs["description"] = " "
+
+        # set index to incremental values
+        new_index = np.arange(len(data), dtype="int64")
+        dset[:] = new_index
+
+    _resolve_external_table(path, analysis_file_name, location="analysis")