scverse · Mr-Milk · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
diff --git a/src/decoupler/op/_hallmark.py b/src/decoupler/op/_hallmark.py
@@ -1,3 +1,5 @@
+from concurrent.futures import Future, ThreadPoolExecutor
+
 import pandas as pd
 
 from decoupler._docs import docs
@@ -11,7 +13,8 @@ def hallmark(
     organism: str = "human",
     license: str = "academic",
     verbose: bool = False,
-) -> pd.DataFrame:
+    as_future: bool = False,
+) -> pd.DataFrame | Future:
     """
     Hallmark gene sets :cite:p:`msigdb`.
 
@@ -23,10 +26,13 @@ def hallmark(
     %(organism)s
     %(license)s
     %(verbose)s
+    future : bool
+        If True, returns a `Future` to allow asynchronous execution.
 
     Returns
     -------
-    Dataframe in long format containing the hallmark gene sets.
+    Dataframe in long format containing the hallmark gene sets
+    or a Future that resolves to it.
 
     Example
     -------
@@ -36,17 +42,28 @@ def hallmark(
 
         hm = dc.op.hallmark()
         hm
+
+        # Asynchronous
+        future = dc.op.hallmark(as_future=True)
+        hm = future.result()
     """
-    url = "https://static.omnipathdb.org/tables/msigdb-hallmark.tsv.gz"
-    hm = _download(url, verbose=verbose)
-    hm = _bytes_to_pandas(hm, sep="\t", compression="gzip")
-    hm = hm[["geneset", "genesymbol"]]
-    hm["geneset"] = hm["geneset"].str.replace("HALLMARK_", "")
-    hm["genesymbol"] = hm["genesymbol"].str.replace("COMPLEX:", "").str.split("_")
-    hm = hm.explode("genesymbol")
-    hm = _infer_dtypes(hm)
-    if organism != "human":
-        hm = translate(hm, columns=["genesymbol"], target_organism=organism, verbose=verbose)
-    hm = hm.rename(columns={"geneset": "source", "genesymbol": "target"})
-    hm = hm.drop_duplicates(["source", "target"]).reset_index(drop=True)
-    return hm
+
+    def _task():
+        url = "https://static.omnipathdb.org/tables/msigdb-hallmark.tsv.gz"
+        hm = _download(url, verbose=verbose)
+        hm = _bytes_to_pandas(hm, sep="\t", compression="gzip")
+        hm = hm[["geneset", "genesymbol"]]
+        hm["geneset"] = hm["geneset"].str.replace("HALLMARK_", "")
+        hm["genesymbol"] = hm["genesymbol"].str.replace("COMPLEX:", "").str.split("_")
+        hm = hm.explode("genesymbol")
+        hm = _infer_dtypes(hm)
+        if organism != "human":
+            hm = translate(hm, columns=["genesymbol"], target_organism=organism, verbose=verbose)
+        hm = hm.rename(columns={"geneset": "source", "genesymbol": "target"})
+        hm = hm.drop_duplicates(["source", "target"]).reset_index(drop=True)
+        return hm
+
+    if as_future:
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            return executor.submit(_task)
+    return _task()
diff --git a/tests/op/test_hallmark.py b/tests/op/test_hallmark.py
@@ -9,3 +9,12 @@ def test_hallmark():
     cols = {"source", "target"}
     assert cols.issubset(hm.columns)
     assert not hm.duplicated(["source", "target"]).any()
+
+
+def test_hallmark_as_future():
+    future = dc.op.hallmark(as_future=True)
+    hm = future.result()
+    assert isinstance(hm, pd.DataFrame)
+    cols = {"source", "target"}
+    assert cols.issubset(hm.columns)
+    assert not hm.duplicated(["source", "target"]).any()