dattalab
diff --git a/‎keypoint_moseq/analysis.py‎
Lines changed: 73 additions & 27 deletions b/‎keypoint_moseq/analysis.py‎
Lines changed: 73 additions & 27 deletions
@@ -46,7 +46,9 @@ def get_syllable_names(project_dir, model_name, syllable_ixs):
 
         for ix in syllable_ixs:
             if len(syll_info_df[syll_info_df.syllable == ix].label.values[0]) > 0:
-                labels[ix] = f"{ix} ({syll_info_df[syll_info_df.syllable == ix].label.values[0]})"
+                labels[ix] = (
+                    f"{ix} ({syll_info_df[syll_info_df.syllable == ix].label.values[0]})"
+                )
     names = [labels[ix] for ix in syllable_ixs]
     return names
 
@@ -214,14 +216,17 @@ def compute_moseq_df(project_dir, model_name, *, fps=30, smooth_heading=True):
             np.concatenate(
                 (
                     [0],
-                    np.sqrt(np.square(np.diff(v["centroid"], axis=0)).sum(axis=1)) * fps,
+                    np.sqrt(np.square(np.diff(v["centroid"], axis=0)).sum(axis=1))
+                    * fps,
                 )
             )
         )
 
         if index_data is not None:
             # find the group for each recording from index data
-            s_group.append([index_data[index_data["name"] == k]["group"].values[0]] * n_frame)
+            s_group.append(
+                [index_data[index_data["name"] == k]["group"].values[0]] * n_frame
+            )
         else:
             # no index data
             s_group.append(["default"] * n_frame)
@@ -236,8 +241,12 @@ def compute_moseq_df(project_dir, model_name, *, fps=30, smooth_heading=True):
         heading.append(recording_heading)
 
         # compute angular velocity (radian per second)
-        gaussian_smoothed_heading = filter_angle(recording_heading, size=3, method="gaussian")
-        angular_velocity.append(np.concatenate(([0], np.diff(gaussian_smoothed_heading) * fps)))
+        gaussian_smoothed_heading = filter_angle(
+            recording_heading, size=3, method="gaussian"
+        )
+        angular_velocity.append(
+            np.concatenate(([0], np.diff(gaussian_smoothed_heading) * fps))
+        )
 
         # add syllable data
         syllables.append(v["syllable"])
@@ -367,7 +376,9 @@ def compute_stats_df(
 def generate_syll_info(project_dir, model_name, syll_info_path):
     # parse model results
     model_results = load_results(project_dir, model_name)
-    unique_sylls = np.unique(np.concatenate([file["syllable"] for file in model_results.values()]))
+    unique_sylls = np.unique(
+        np.concatenate([file["syllable"] for file in model_results.values()])
+    )
     # construct the syllable dictionary
     # in the non interactive version there won't be any group info
     syll_info_df = pd.DataFrame(
@@ -428,8 +439,12 @@ def label_syllables(project_dir, model_name, moseq_df):
     # load syll_info
     syll_info_df = pd.read_csv(syll_info_path, index_col=False).fillna("")
     # split into with movie and without movie
-    syll_info_df_with_movie = syll_info_df[syll_info_df.movie_path.str.contains(".mp4")].copy()
-    syll_info_df_without_movie = syll_info_df[~syll_info_df.movie_path.str.contains(".mp4")].copy()
+    syll_info_df_with_movie = syll_info_df[
+        syll_info_df.movie_path.str.contains(".mp4")
+    ].copy()
+    syll_info_df_without_movie = syll_info_df[
+        ~syll_info_df.movie_path.str.contains(".mp4")
+    ].copy()
 
     # create select widget only include the ones with a movie
     select = pn.widgets.Select(
@@ -520,7 +535,9 @@ def b(event, save=True):
     button.on_click(b)
 
     # bind everything together
-    return pn.Row(pn.Column(select, ivideo), pn.Column(summary_table, pn.Column(button)))
+    return pn.Row(
+        pn.Column(select, ivideo), pn.Column(summary_table, pn.Column(button))
+    )
 
 
 def get_tie_correction(x, N_m):
@@ -605,7 +622,10 @@ def run_manual_KW_test(
     # get square of sums for each group
     ssbn = np.zeros((n_perm, N_s))
     for i in range(num_groups):
-        ssbn += perm_ranks[:, cum_group_idx[i] : cum_group_idx[i + 1]].sum(1) ** 2 / n_per_group[i]
+        ssbn += (
+            perm_ranks[:, cum_group_idx[i] : cum_group_idx[i + 1]].sum(1) ** 2
+            / n_per_group[i]
+        )
 
     # h-statistic
     h_all = 12.0 / (N_m * (N_m + 1)) * ssbn - 3 * (N_m + 1)
@@ -616,7 +636,9 @@ def run_manual_KW_test(
     p_i = np.random.randint(n_perm)
     s_i = np.random.randint(N_s)
     kr = stats.kruskal(
-        *np.array_split(merged_usages_all[perm[p_i, :], s_i], np.cumsum(n_per_group[:-1]))
+        *np.array_split(
+            merged_usages_all[perm[p_i, :], s_i], np.cumsum(n_per_group[:-1])
+        )
     )
     assert (kr.statistic == h_all[p_i, s_i]) & (
         kr.pvalue == p_vals[p_i, s_i]
@@ -671,7 +693,8 @@ def dunns_z_test_permute_within_group_pairs(
 
         ranks_perm = real_ranks[(is_i | is_j)][rnd.rand(n_perm, n_mice).argsort(-1)]
         diff = np.abs(
-            ranks_perm[:, : is_i.sum(), :].mean(1) - ranks_perm[:, is_i.sum() :, :].mean(1)
+            ranks_perm[:, : is_i.sum(), :].mean(1)
+            - ranks_perm[:, is_i.sum() :, :].mean(1)
         )
         B = 1.0 / vc.loc[i_n] + 1.0 / vc.loc[j_n]
 
@@ -732,7 +755,9 @@ def compute_pvalues_for_group_pairs(
 
     p_vals_allperm = {}
     for pair in combinations(group_names, 2):
-        p_vals_allperm[pair] = ((null_zs[pair] > real_zs_within_group[pair]).sum(0) + 1) / n_perm
+        p_vals_allperm[pair] = (
+            (null_zs[pair] > real_zs_within_group[pair]).sum(0) + 1
+        ) / n_perm
 
     # summarize into df
     df_pval = pd.DataFrame(p_vals_allperm)
@@ -782,7 +807,9 @@ def run_kruskal(
     rnd = np.random.RandomState(seed=seed)
     # get grouped mean data
     grouped_data = (
-        stats_df.pivot_table(index=["group", "name"], columns="syllable", values=statistic)
+        stats_df.pivot_table(
+            index=["group", "name"], columns="syllable", values=statistic
+        )
         .replace(np.nan, 0)
         .reset_index()
     )
@@ -813,7 +840,9 @@ def run_kruskal(
     # find the real k_real
     df_k_real = pd.DataFrame(
         [
-            stats.kruskal(*np.array_split(syllable_data[:, s_i], np.cumsum(n_per_group[:-1])))
+            stats.kruskal(
+                *np.array_split(syllable_data[:, s_i], np.cumsum(n_per_group[:-1]))
+            )
             for s_i in range(N_s)
         ]
     )
@@ -851,7 +880,9 @@ def run_kruskal(
     df_z = pd.DataFrame(real_zs_within_group)
     df_z.index = df_z.index.set_names("syllable")
     dunn_results_df = df_z.reset_index().melt(id_vars=[("syllable", "")])
-    dunn_results_df.rename(columns={"variable_0": "group1", "variable_1": "group2"}, inplace=True)
+    dunn_results_df.rename(
+        columns={"variable_0": "group1", "variable_1": "group2"}, inplace=True
+    )
 
     # Get intersecting significant syllables between
     intersect_sig_syllables = {}
@@ -864,7 +895,9 @@ def run_kruskal(
 
 
 # frequency plot stuff
-def sort_syllables_by_stat_difference(stats_df, ctrl_group, exp_group, stat="frequency"):
+def sort_syllables_by_stat_difference(
+    stats_df, ctrl_group, exp_group, stat="frequency"
+):
     """Sort syllables by the difference in the stat between the control and
     experimental group.
 
@@ -997,7 +1030,9 @@ def _validate_and_order_syll_stats_params(
             raise ValueError(
                 f"Attempting to sort by {stat} differences, but {ctrl_group} or {exp_group} not in {groups}."
             )
-        ordering = sort_syllables_by_stat_difference(complete_df, ctrl_group, exp_group, stat=stat)
+        ordering = sort_syllables_by_stat_difference(
+            complete_df, ctrl_group, exp_group, stat=stat
+        )
     if colors is None:
         colors = []
     if len(colors) == 0 or len(colors) != len(groups):
@@ -1146,7 +1181,9 @@ def plot_syll_stats_with_sem(
                     markings.append(np.where(ordering == s)[0])
                 if len(markings) > 0:
                     markings = np.concatenate(markings)
-                    plt.scatter(markings, [init_y] * len(markings), color="r", marker="*")
+                    plt.scatter(
+                        markings, [init_y] * len(markings), color="r", marker="*"
+                    )
                     plt.text(
                         plt.xlim()[1],
                         init_y,
@@ -1309,7 +1346,9 @@ def get_transition_matrix(
             # Get syllable transitions
             transitions = get_transitions(v)[0]
 
-            trans_mat = n_gram_transition_matrix(transitions, n=2, max_label=max_syllable)
+            trans_mat = n_gram_transition_matrix(
+                transitions, n=2, max_label=max_syllable
+            )
             init_matrix.append(trans_mat)
 
         init_matrix = np.sum(init_matrix, axis=0) + smoothing
@@ -1322,7 +1361,8 @@ def get_transition_matrix(
             transitions = get_transitions(v)[0]
 
             trans_mat = (
-                n_gram_transition_matrix(transitions, n=2, max_label=max_syllable) + smoothing
+                n_gram_transition_matrix(transitions, n=2, max_label=max_syllable)
+                + smoothing
             )
 
             # Normalize matrix
@@ -1368,9 +1408,9 @@ def get_group_trans_mats(labels, label_group, group, syll_include, normalize="bi
         # Get recordings to include in trans_mat
         # subset only syllable included
         trans_mats.append(
-            get_transition_matrix(use_labels, normalize=normalize, combine=True)[syll_include, :][
-                :, syll_include
-            ]
+            get_transition_matrix(use_labels, normalize=normalize, combine=True)[
+                syll_include, :
+            ][:, syll_include]
         )
 
         # Getting frequency information for node scaling
@@ -1447,7 +1487,9 @@ def visualize_transition_bigram(
     save_analysis_figure(fig, plot_name, project_dir, model_name, save_dir)
 
 
-def generate_transition_matrices(project_dir, model_name, normalize="bigram", min_frequency=0.005):
+def generate_transition_matrices(
+    project_dir, model_name, normalize="bigram", min_frequency=0.005
+):
     """Generate the transition matrices for each recording.
 
     Parameters
@@ -1543,7 +1585,9 @@ def plot_transition_graph_group(
         nodelist = G.nodes()
         # normalize the usage values
         sum_usages = sum(usages[i])
-        normalized_usages = np.array([u / sum_usages for u in usages[i]]) * node_scaling + 1000
+        normalized_usages = (
+            np.array([u / sum_usages for u in usages[i]]) * node_scaling + 1000
+        )
         nx.draw_networkx_nodes(
             G,
             pos,
@@ -1631,7 +1675,9 @@ def plot_transition_graph_difference(
         # left tm minus right tm
         tm_diff = trans_mats[left_ind] - trans_mats[right_ind]
         # left usage minus right usage
-        usages_diff = np.array(list(usages[left_ind])) - np.array(list(usages[right_ind]))
+        usages_diff = np.array(list(usages[left_ind])) - np.array(
+            list(usages[right_ind])
+        )
         normlized_usg_abs_diff = (
             np.abs(usages_diff) / np.abs(usages_diff).sum()
         ) * node_scaling + 500