LorenFrankLab · CBroz1 · Oct 3, 2025 · Oct 3, 2025 · Oct 3, 2025 · Oct 3, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -35,6 +35,7 @@ import all foreign key references.
 
 - Auto-load within-Spyglass tables for graph operations #1368
 - Allow rechecking of recomputes #1380, #1413
+- Remove `populate` transaction workaround with tripart `make` calls #1422
 
 ### Pipelines
 

diff --git a/docs/src/Features/Mixin.md b/docs/src/Features/Mixin.md
@@ -243,23 +243,6 @@ See [issue #1000](https://github.com/LorenFrankLab/spyglass/issues/1000) and
 [PR #1001](https://github.com/LorenFrankLab/spyglass/pull/1001) for more
 information.
 
-### Disable Transaction Protection
-
-By default, DataJoint wraps the `populate` function in a transaction to ensure
-data integrity (see
-[Transactions](https://docs.datajoint.io/python/definition/05-Transactions.html)).
-
-This can cause issues when populating large tables if another user attempts to
-declare/modify a table while the transaction is open (see
-[issue #1030](https://github.com/LorenFrankLab/spyglass/issues/1030) and
-[DataJoint issue #1170](https://github.com/datajoint/datajoint-python/issues/1170)).
-
-Tables with `_use_transaction` set to `False` will not be wrapped in a
-transaction when calling `populate`. Transaction protection is replaced by a
-hash of upstream data to ensure no changes are made to the table during the
-unprotected populate. The additional time required to hash the data is a
-trade-off for already time-consuming populates, but avoids blocking other users.
-
 ## Miscellaneous Helper functions
 
 `file_like` allows you to restrict a table using a substring of a file name.

diff --git a/docs/src/ForDevelopers/CustomPipelines.md b/docs/src/ForDevelopers/CustomPipelines.md
@@ -148,6 +148,8 @@ downstream analysis is selective to an analysis result, you might add a `result`
 field to the analysis table, and store various results associated with that
 analysis in a part table.
 
+#### Table Example
+
 Example analysis table:
 
 ```python
@@ -192,15 +194,50 @@ class MyAnalysis(SpyglassMixin, dj.Computed):
         self.MyAnalysisPart.insert1({**key, "result": 1})
 ```
 
+### Make Method
+
 In general, `make` methods have three steps:
 
 1. Collect inputs: fetch the relevant parameters and data.
 2. Run analysis: run the analysis on the inputs.
 3. Insert results: insert the results into the relevant tables.
 
 DataJoint has protections in place to ensure that `populate` calls are treated
-as a single transaction, but separating these steps supports debugging and
-testing.
+as a single transaction, but transaction times can slow down table interactions
+for collaborators. Instead, consider an explicit separation with a
+[generator approach](https://github.com/datajoint/datajoint-python/blob/63ebc380ecdd1ba1b0cff02f9927fe2666a59e24/datajoint/autopopulate.py#L108-L112).
+
+```python
+@schema
+class MyAnalysis(SpyglassMixin, dj.Computed):
+    ...
+
+    def make_fetch(self, key):
+        one = SomeUpstreamTable.fetch1(...) # (1)
+        two = AnotherUpstreamTable.fetch1(...) # (2)
+
+        return [one, two]
+
+    def make_compute(self, key, one, two):
+        result = some_analysis_function(one, two)  # (3)
+        self_insert = {'result_field': result}  # (4)
+
+        return self_insert
+
+    def make_insert(self, key, self_insert):
+        self.insert1(dict(key, **self_insert))  # (5)
+```
+
+1. `make_fetch` may not modify the key or the database, and only fetches data.
+2. `make_fetch` must be deterministic and idempotent.
+    - Deterministic: given the same key, it always returns the same data.
+    - Idempotent: calling it multiple times has the same effect as calling it
+      once.
+3. `make_compute` runs time-consuming computations.
+4. `make_compute` should not modify the key or the database.
+5. `make_insert` modifies the database.
+
+### Time Intervals
 
 To facilitate operations on the time intervals, the `IntervalList` table has a
 `fetch_interval` method that returns the relevant `valid_times` as an `Interval`

diff --git a/src/spyglass/behavior/v1/moseq.py b/src/spyglass/behavior/v1/moseq.py
@@ -1,8 +1,10 @@
 import os
 from pathlib import Path
+from typing import Dict, List, Optional
 
 import datajoint as dj
 import keypoint_moseq as kpms
+import numpy as np
 
 from spyglass.common import AnalysisNwbfile
 from spyglass.position.position_merge import PositionOutput
@@ -108,25 +110,68 @@ class MoseqModel(SpyglassMixin, dj.Computed):
     model_name = "": varchar(255)
     """
 
-    def make(self, key):
-        """Method to train a model and insert the resulting model into the MoseqModel table
+    # Make method trains a model and inserts it into the table
+
+    def make_fetch(self, key: dict) -> List:  # TODO: test
+        """Fetch data relevant to model training.
 
         Parameters
         ----------
         key : dict
             key to a single MoseqModelSelection table entry
         """
-        model_params = (MoseqModelParams & key).fetch1("model_params")
-        model_name = self._make_model_name(key)
+        model_params = (MoseqModelParams & key).fetch1("model_params")  # FETCH
+        model_name = self._make_model_name(key)  # FETCH
+        video_paths = (PoseGroup & key).fetch_video_paths()  # FETCH
+        bodyparts = (PoseGroup & key).fetch1("bodyparts")  # FETCH
+        coordinates, confidences = PoseGroup().fetch_pose_datasets(
+            key, format_for_moseq=True
+        )
 
+        model, epochs_trained = None, None
+        initial_model_key = model_params.get("initial_model", None)
+        if initial_model_key is not None:
+            # begin training from an existing model
+            query = MoseqModel & initial_model_key
+            if not query:
+                raise ValueError(
+                    f"Initial model: {initial_model_key} not found"
+                )
+            model = query.fetch_model()
+            epochs_trained = query.fetch1("epochs_trained")
+
+        return [
+            model_params,
+            model_name,
+            video_paths,
+            bodyparts,
+            coordinates,
+            confidences,
+            initial_model_key,
+            model,
+            epochs_trained,
+        ]
+
+    def make_compute(
+        self,
+        key: dict,
+        model_params: dict,
+        model_name: str,
+        video_paths: List[Path],
+        bodyparts: List[str],
+        coordinates: Dict[str, np.ndarray],
+        confidences: Dict[str, np.ndarray],
+        initial_model_key: dict,
+        model: Optional[dict] = None,
+        epochs_trained: Optional[int] = None,
+    ):
         # set up the project and config
         project_dir, video_dir = moseq_project_dir, moseq_video_dir
         project_dir = os.path.join(project_dir, model_name)
         video_dir = os.path.join(video_dir, model_name)
         # os.makedirs(project_dir, exist_ok=True)
         os.makedirs(video_dir, exist_ok=True)
         # make symlinks to the videos in a single directory
-        video_paths = (PoseGroup & key).fetch_video_paths()
         for video in video_paths:
             destination = os.path.join(video_dir, os.path.basename(video))
             if os.path.exists(destination):
@@ -135,7 +180,6 @@ def make(self, key):
                 os.remove(destination)  # remove if it's a broken symlink
             os.symlink(video, destination)
 
-        bodyparts = (PoseGroup & key).fetch1("bodyparts")
         kpms.setup_project(
             str(project_dir),
             video_dir=str(video_dir),
@@ -149,9 +193,6 @@ def make(self, key):
         config = kpms.load_config(project_dir)
 
         # fetch the data and format it for moseq
-        coordinates, confidences = PoseGroup().fetch_pose_datasets(
-            key, format_for_moseq=True
-        )
         data, metadata = kpms.format_data(coordinates, confidences, **config)
 
         # either initialize a new model or load an existing one
@@ -162,21 +203,12 @@ def make(self, key):
             )
             epochs_trained = model_params["num_ar_iters"]
 
-        else:
-            # begin training from an existing model
-            query = MoseqModel & initial_model_key
-            if not query:
-                raise ValueError(
-                    f"Initial model: {initial_model_key} not found"
-                )
-            model = query.fetch_model()
-            epochs_trained = query.fetch1("epochs_trained")
-
         # update the hyperparameters
         kappa = model_params["kappa"]
         model = kpms.update_hypparams(model, kappa=kappa)
         # run fitting on the complete model
         num_epochs = model_params["num_epochs"]
+        total_epochs_trained = (epochs_trained or 0) + num_epochs
         model = kpms.fit_model(
             model,
             data,
@@ -185,18 +217,21 @@ def make(self, key):
             model_name,
             ar_only=False,
             start_iter=epochs_trained,
-            num_iters=epochs_trained + num_epochs,
+            num_iters=total_epochs_trained,
         )[0]
         # reindex syllables by frequency
         kpms.reindex_syllables_in_checkpoint(project_dir, model_name)
-        self.insert1(
-            {
-                **key,
-                "project_dir": project_dir,
-                "epochs_trained": num_epochs + epochs_trained,
-                "model_name": model_name,
-            }
-        )
+
+        secondary_key = {
+            "project_dir": project_dir,
+            "epochs_trained": total_epochs_trained,
+            "model_name": model_name,
+        }
+
+        return [secondary_key]
+
+    def make_insert(self, key: dict, secondary_key: dict = None):
+        self.insert1(dict(key, **secondary_key))
 
     def _make_model_name(self, key: dict):
         # make a unique model name based on the key