AgnostiqHQ
diff --git a/‎.github/workflows/license.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/license.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎CHANGELOG.md
Lines changed: 9 additions & 4 deletions b/‎CHANGELOG.md
Lines changed: 9 additions & 4 deletions
diff --git a/‎covalent_dispatcher/_core/dispatcher.py
Lines changed: 14 additions & 16 deletions b/‎covalent_dispatcher/_core/dispatcher.py
Lines changed: 14 additions & 16 deletions
diff --git a/‎covalent_dispatcher/_core/dispatcher_modules/caches.py
Lines changed: 166 additions & 51 deletions b/‎covalent_dispatcher/_core/dispatcher_modules/caches.py
Lines changed: 166 additions & 51 deletions
@@ -62,7 +62,7 @@ jobs:
         with:
           requirements: "requirements.txt,tests/requirements.txt,requirements-client.txt"
           fail: "Copyleft,Error,Other"
-          exclude: "^(pylint|aio[-_]*).*"
+          exclude: "^(pylint|aio[-_]*|pytest-asyncio|typing-extensions).*"
           exclude-license: 'Mozilla Public License 2.0 \(MPL 2.0\)'
           totals: true
           headers: true
 
@@ -11,19 +11,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Allow registering custom file transfer strategies
 
+### Removed
+
+- Removed obsolete endpoint to register sublattice manifest
+
+### Operations
+
+- Whitelisted two packages for license check
+
 ### Changed
 
+- Moved in-memory dispatcher state to main DB
 - Improved automatic file transfer strategy selection
 - HTTP strategy can now upload files too
 - Adjusted sublattice logic. The sublattice builder now attempts to
 link the sublattice with its parent electron.
 - Replaced json sublattice flow with new tarball importer to allow future memory
 footprint enhancements
 
-### Removed
-
-- Removed obsolete endpoint to register sublattice manifest
-
 ## [0.238.0-rc.0] - 2025-03-05
 
 ### Authors
 
@@ -34,7 +34,7 @@
 from . import runner_ng
 from .data_modules import graph as tg_utils
 from .data_modules import job_manager as jbmgr
-from .dispatcher_modules.caches import _pending_parents, _sorted_task_groups, _unresolved_tasks
+from .dispatcher_modules.caches import _task_group_cache, _workflow_run_cache
 from .runner_modules.cancel import cancel_tasks
 
 app_log = logger.app_log
@@ -95,7 +95,7 @@ async def _handle_completed_node(dispatch_id: str, node_id: int):
         gid = child["task_group_id"]
         app_log.debug(f"dispatch {dispatch_id}: parent gid {parent_gid}, child gid {gid}")
         if parent_gid != gid:
-            now_pending = await _pending_parents.decrement(dispatch_id, gid)
+            now_pending = await _task_group_cache.decrement(dispatch_id, gid)
             if now_pending < 1:
                 app_log.debug(f"Queuing task group {gid} for execution")
                 next_task_groups.append(gid)
@@ -414,13 +414,12 @@ async def _finalize_dispatch(dispatch_id: str):
 
 
 async def _initialize_caches(dispatch_id, pending_parents, sorted_task_groups):
-    for gid, indegree in pending_parents.items():
-        await _pending_parents.set_pending(dispatch_id, gid, indegree)
-
-    for gid, sorted_nodes in sorted_task_groups.items():
-        await _sorted_task_groups.set_task_group(dispatch_id, gid, sorted_nodes)
+    for gid in pending_parents:
+        indegree = pending_parents[gid]
+        sorted_nodes = sorted_task_groups[gid]
+        await _task_group_cache.set(dispatch_id, gid, indegree, sorted_nodes)
 
-    await _unresolved_tasks.set_unresolved(dispatch_id, 0)
+    await _workflow_run_cache.set_unresolved(dispatch_id, 0)
 
 
 async def _submit_initial_tasks(dispatch_id: str):
@@ -442,7 +441,7 @@ async def _submit_initial_tasks(dispatch_id: str):
     for gid in initial_groups:
         sorted_nodes = sorted_task_groups[gid]
         app_log.debug(f"Sorted nodes group group {gid}: {sorted_nodes}")
-        await _unresolved_tasks.increment(dispatch_id, len(sorted_nodes))
+        await _workflow_run_cache.increment(dispatch_id, len(sorted_nodes))
 
     for gid in initial_groups:
         sorted_nodes = sorted_task_groups[gid]
@@ -469,8 +468,8 @@ async def _handle_node_status_update(dispatch_id, node_id, node_status, detail):
     if node_status == RESULT_STATUS.COMPLETED:
         next_task_groups = await _handle_completed_node(dispatch_id, node_id)
         for gid in next_task_groups:
-            sorted_nodes = await _sorted_task_groups.get_task_group(dispatch_id, gid)
-            await _unresolved_tasks.increment(dispatch_id, len(sorted_nodes))
+            sorted_nodes = await _task_group_cache.get_task_group(dispatch_id, gid)
+            await _workflow_run_cache.increment(dispatch_id, len(sorted_nodes))
             await _submit_task_group(dispatch_id, sorted_nodes, gid)
 
     if node_status == RESULT_STATUS.FAILED:
@@ -481,7 +480,7 @@ async def _handle_node_status_update(dispatch_id, node_id, node_status, detail):
 
     # Decrement after any increments to avoid race with
     # finalize_dispatch()
-    await _unresolved_tasks.decrement(dispatch_id)
+    await _workflow_run_cache.decrement(dispatch_id)
 
 
 async def _handle_dispatch_exception(dispatch_id: str, ex: Exception) -> RESULT_STATUS:
@@ -532,7 +531,7 @@ async def _handle_event(msg: Dict):
             fut.set_result(dispatch_status)
         return dispatch_status
 
-    unresolved = await _unresolved_tasks.get_unresolved(dispatch_id)
+    unresolved = await _workflow_run_cache.get_unresolved(dispatch_id)
     if unresolved < 1:
         app_log.debug("Finalizing dispatch")
         try:
@@ -551,7 +550,7 @@ async def _handle_event(msg: Dict):
 
 async def _clear_caches(dispatch_id: str):
     """Clean up all keys in caches."""
-    await _unresolved_tasks.remove(dispatch_id)
+    await _workflow_run_cache.remove(dispatch_id)
 
     g_node_link = await tg_utils.get_nodes_links(dispatch_id)
     g = nx.readwrite.node_link_graph(g_node_link)
@@ -560,5 +559,4 @@ async def _clear_caches(dispatch_id: str):
 
     for gid in task_groups:
         # Clean up no longer referenced keys
-        await _pending_parents.remove(dispatch_id, gid)
-        await _sorted_task_groups.remove(dispatch_id, gid)
+        await _task_group_cache.remove(dispatch_id, gid)
@@ -18,84 +18,199 @@
 Helper classes for the dispatcher
 """
 
-from .store import _DictStore, _KeyValueBase
+import json
+import os
+import tempfile
 
+from covalent_dispatcher._core.data_modules.utils import run_in_executor
+from covalent_dispatcher._dal.base import workflow_db
+from covalent_dispatcher._dal.dispatcher_state import TaskGroupState, WorkflowState
+from covalent_dispatcher._db.datastore import DataStore
 
-def _pending_parents_key(dispatch_id: str, node_id: int):
-    return f"pending-parents-{dispatch_id}:{node_id}"
 
+class _WorkflowRunState:
 
-def _unresolved_tasks_key(dispatch_id: str):
-    return f"unresolved-{dispatch_id}"
+    def __init__(self, db: DataStore):
+        self.db = db
 
-
-def _task_groups_key(dispatch_id: str, task_group_id: int):
-    return f"task-groups-{dispatch_id}:{task_group_id}"
-
-
-class _UnresolvedTasksCache:
-    def __init__(self, store: _KeyValueBase = _DictStore()):
-        self._store = store
+    def _get_unresolved(self, dispatch_id: str):
+        with self.db.session() as session:
+            records = WorkflowState.get(
+                session,
+                fields=["num_unresolved_tasks"],
+                equality_filters={"dispatch_id": dispatch_id},
+                membership_filters={},
+            )
+            return records[0].num_unresolved_tasks
 
     async def get_unresolved(self, dispatch_id: str):
-        key = _unresolved_tasks_key(dispatch_id)
-        return await self._store.get(key)
+        return await run_in_executor(self._get_unresolved, dispatch_id)
+
+    def _set_unresolved(self, dispatch_id: str, val: int):
+        with self.db.session() as session:
+            WorkflowState.create(
+                session,
+                insert_kwargs={
+                    "dispatch_id": dispatch_id,
+                    "num_unresolved_tasks": val,
+                },
+            )
+            session.commit()
 
     async def set_unresolved(self, dispatch_id: str, val: int):
-        key = _unresolved_tasks_key(dispatch_id)
-        await self._store.insert(key, val)
+        return await run_in_executor(self._set_unresolved, dispatch_id, val)
+
+    def _increment(self, dispatch_id: str, interval: int = 1):
+        with self.db.session() as session:
+            WorkflowState.incr_bulk(
+                session=session,
+                increments={"num_unresolved_tasks": interval},
+                equality_filters={"dispatch_id": dispatch_id},
+                membership_filters={},
+            )
+            records = WorkflowState.get(
+                session,
+                fields=["num_unresolved_tasks"],
+                equality_filters={"dispatch_id": dispatch_id},
+                membership_filters={},
+            )
+            session.commit()
+            return records[0].num_unresolved_tasks
 
     async def increment(self, dispatch_id: str, interval: int = 1):
-        key = _unresolved_tasks_key(dispatch_id)
-        return await self._store.increment(key, interval)
+        return await run_in_executor(self._increment, dispatch_id, interval)
+
+    def _decrement(self, dispatch_id: str):
+        with self.db.session() as session:
+            WorkflowState.incr_bulk(
+                session=session,
+                increments={"num_unresolved_tasks": -1},
+                equality_filters={"dispatch_id": dispatch_id},
+                membership_filters={},
+            )
+            records = WorkflowState.get(
+                session,
+                fields=["num_unresolved_tasks"],
+                equality_filters={"dispatch_id": dispatch_id},
+                membership_filters={},
+            )
+            session.commit()
+            return records[0].num_unresolved_tasks
 
     async def decrement(self, dispatch_id: str):
-        key = _unresolved_tasks_key(dispatch_id)
-        return await self._store.increment(key, -1)
+        return await run_in_executor(self._decrement, dispatch_id)
+
+    def _remove(self, dispatch_id: str):
+        with self.db.session() as session:
+            WorkflowState.delete_bulk(
+                session=session,
+                equality_filters={"dispatch_id": dispatch_id},
+                membership_filters={},
+            )
+            session.commit()
 
     async def remove(self, dispatch_id: str):
-        key = _unresolved_tasks_key(dispatch_id)
-        await self._store.remove(key)
+        await run_in_executor(self._remove, dispatch_id)
 
 
-class _PendingParentsCache:
-    def __init__(self, store: _KeyValueBase = _DictStore()):
-        self._store = store
+class TaskGroupRunState:
 
-    async def get_pending(self, dispatch_id: str, task_group_id: int):
-        key = _pending_parents_key(dispatch_id, task_group_id)
-        return await self._store.get(key)
+    def __init__(self, db):
+        self.db = db
+
+    def _get_pending(self, dispatch_id: str, task_group_id: int):
+        with self.db.session() as session:
+            records = TaskGroupState.get(
+                session=session,
+                fields=["num_pending_parents"],
+                equality_filters={"dispatch_id": dispatch_id, "task_group_id": task_group_id},
+                membership_filters={},
+            )
+            return records[0].num_pending_parents
 
-    async def set_pending(self, dispatch_id: str, task_group_id: int, val: int):
-        key = _pending_parents_key(dispatch_id, task_group_id)
-        await self._store.insert(key, val)
+    async def get_pending(self, dispatch_id: str, task_group_id: int):
+        return await run_in_executor(self._get_pending, dispatch_id, task_group_id)
+
+    def _set(self, dispatch_id: str, task_group_id: int, num_pending: int, sorted_nodes):
+        with self.db.session() as session:
+            TaskGroupState.create(
+                session=session,
+                insert_kwargs={
+                    "dispatch_id": dispatch_id,
+                    "task_group_id": task_group_id,
+                    "num_pending_parents": num_pending,
+                    "sorted_tasks": json.dumps(sorted_nodes),
+                },
+            )
+            session.commit()
+
+    async def set(self, dispatch_id: str, task_group_id: int, num_pending: int, sorted_nodes):
+        return await run_in_executor(
+            self._set, dispatch_id, task_group_id, num_pending, sorted_nodes
+        )
+
+    def _decrement(self, dispatch_id: str, task_group_id):
+        with self.db.session() as session:
+            TaskGroupState.incr_bulk(
+                session=session,
+                increments={"num_pending_parents": -1},
+                equality_filters={"dispatch_id": dispatch_id, "task_group_id": task_group_id},
+                membership_filters={},
+            )
+            records = TaskGroupState.get(
+                session,
+                fields=["num_pending_parents"],
+                equality_filters={"dispatch_id": dispatch_id, "task_group_id": task_group_id},
+                membership_filters={},
+            )
+            session.commit()
+            return records[0].num_pending_parents
 
     async def decrement(self, dispatch_id: str, task_group_id: int):
-        key = _pending_parents_key(dispatch_id, task_group_id)
-        return await self._store.increment(key, -1)
+        return await run_in_executor(self._decrement, dispatch_id, task_group_id)
 
     async def remove(self, dispatch_id: str, task_group_id: int):
-        key = _pending_parents_key(dispatch_id, task_group_id)
-        await self._store.remove(key)
-
-
-class _SortedTaskGroups:
-    def __init__(self, store: _KeyValueBase = _DictStore()):
-        self._store = store
+        pass
+
+    def _get_task_group(self, dispatch_id: str, task_group_id: int):
+        with self.db.session() as session:
+            records = TaskGroupState.get(
+                session=session,
+                fields=["sorted_tasks"],
+                equality_filters={"dispatch_id": dispatch_id, "task_group_id": task_group_id},
+                membership_filters={},
+            )
+            return json.loads(records[0].sorted_tasks)
 
     async def get_task_group(self, dispatch_id: str, task_group_id: int):
-        key = _task_groups_key(dispatch_id, task_group_id)
-        return await self._store.get(key)
+        return await run_in_executor(self._get_task_group, dispatch_id, task_group_id)
 
-    async def set_task_group(self, dispatch_id: str, task_group_id: int, sorted_nodes: list):
-        key = _task_groups_key(dispatch_id, task_group_id)
-        await self._store.insert(key, sorted_nodes)
+    def _remove(self, dispatch_id: str, task_group_id: int):
+        with self.db.session() as session:
+            TaskGroupState.delete_bulk(
+                session=session,
+                equality_filters={"dispatch_id": dispatch_id, "task_group_id": task_group_id},
+                membership_filters={},
+            )
+            session.commit()
 
     async def remove(self, dispatch_id: str, task_group_id: int):
-        key = _task_groups_key(dispatch_id, task_group_id)
-        await self._store.remove(key)
+        await run_in_executor(self._remove, dispatch_id, task_group_id)
+
+
+# Default to tmpfs backed file
+cache_db_file = tempfile.NamedTemporaryFile(
+    mode="w+b", prefix="covalent-dispatcher-cache-", suffix=".db"
+)
+cache_db_URL = os.environ.get("COVALENT_CACHE_DB_URL", f"sqlite+pysqlite:///{cache_db_file.name}")
+initialize_db = True
+
+# If we want to store dispatcher state in the main DB, let the alembic migrations
+# create the tables
+if cache_db_URL == workflow_db.db_URL:
+    initialize_db = False
 
+cache_db = DataStore(db_URL=cache_db_URL, initialize_db=initialize_db)
 
-_pending_parents = _PendingParentsCache()
-_unresolved_tasks = _UnresolvedTasksCache()
-_sorted_task_groups = _SortedTaskGroups()
+_task_group_cache = TaskGroupRunState(db=cache_db)
+_workflow_run_cache = _WorkflowRunState(db=cache_db)