From 97ef9b19628f0a9012f33cef1da9d629efc09742 Mon Sep 17 00:00:00 2001 From: Yimin Zheng Date: Fri, 8 Aug 2025 09:40:11 +0000 Subject: [PATCH 1/4] draft impl for returning future object --- src/decoupler/op/_hallmark.py | 44 ++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/decoupler/op/_hallmark.py b/src/decoupler/op/_hallmark.py index e88f6d6..f63926e 100644 --- a/src/decoupler/op/_hallmark.py +++ b/src/decoupler/op/_hallmark.py @@ -1,3 +1,5 @@ +from concurrent.futures import ThreadPoolExecutor + import pandas as pd from decoupler._docs import docs @@ -11,6 +13,7 @@ def hallmark( organism: str = "human", license: str = "academic", verbose: bool = False, + as_future: bool = False, ) -> pd.DataFrame: """ Hallmark gene sets :cite:p:`msigdb`. @@ -23,10 +26,13 @@ def hallmark( %(organism)s %(license)s %(verbose)s + future : bool + If True, returns a `Future` to allow asynchronous execution. Returns ------- - Dataframe in long format containing the hallmark gene sets. + Dataframe in long format containing the hallmark gene sets + or a Future that resolves to it. Example ------- @@ -36,17 +42,27 @@ def hallmark( hm = dc.op.hallmark() hm + + # Asynchronous + future = dc.op.hallmark(as_future=True) + hm = future.result() """ - url = "https://static.omnipathdb.org/tables/msigdb-hallmark.tsv.gz" - hm = _download(url, verbose=verbose) - hm = _bytes_to_pandas(hm, sep="\t", compression="gzip") - hm = hm[["geneset", "genesymbol"]] - hm["geneset"] = hm["geneset"].str.replace("HALLMARK_", "") - hm["genesymbol"] = hm["genesymbol"].str.replace("COMPLEX:", "").str.split("_") - hm = hm.explode("genesymbol") - hm = _infer_dtypes(hm) - if organism != "human": - hm = translate(hm, columns=["genesymbol"], target_organism=organism, verbose=verbose) - hm = hm.rename(columns={"geneset": "source", "genesymbol": "target"}) - hm = hm.drop_duplicates(["source", "target"]).reset_index(drop=True) - return hm + + def _task(): + url = "https://static.omnipathdb.org/tables/msigdb-hallmark.tsv.gz" + hm = _download(url, verbose=verbose) + hm = _bytes_to_pandas(hm, sep="\t", compression="gzip") + hm = hm[["geneset", "genesymbol"]] + hm["geneset"] = hm["geneset"].str.replace("HALLMARK_", "") + hm["genesymbol"] = hm["genesymbol"].str.replace("COMPLEX:", "").str.split("_") + hm = hm.explode("genesymbol") + hm = _infer_dtypes(hm) + if organism != "human": + hm = translate(hm, columns=["genesymbol"], target_organism=organism, verbose=verbose) + hm = hm.rename(columns={"geneset": "source", "genesymbol": "target"}) + hm = hm.drop_duplicates(["source", "target"]).reset_index(drop=True) + + if as_future: + with ThreadPoolExecutor(max_workers=1) as executor: + return executor.submit(_task) + return _task() From 10de8a8bf61a21f2238ba43c8b8e1fe4fd4d74fc Mon Sep 17 00:00:00 2001 From: Yimin Zheng Date: Fri, 8 Aug 2025 09:40:24 +0000 Subject: [PATCH 2/4] add test --- tests/op/test_hallmark.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/op/test_hallmark.py b/tests/op/test_hallmark.py index 7ec2ed1..166ba2d 100644 --- a/tests/op/test_hallmark.py +++ b/tests/op/test_hallmark.py @@ -9,3 +9,12 @@ def test_hallmark(): cols = {"source", "target"} assert cols.issubset(hm.columns) assert not hm.duplicated(["source", "target"]).any() + + +def test_hallmark_as_future(): + future = dc.op.hallmark(as_future=True) + hm = future.result() + assert isinstance(hm, pd.DataFrame) + cols = {"source", "target"} + assert cols.issubset(hm.columns) + assert not hm.duplicated(["source", "target"]).any() From e2c2da10de5aabfd12ea241c5c6f7867da4040d9 Mon Sep 17 00:00:00 2001 From: Yimin Zheng Date: Fri, 8 Aug 2025 09:47:12 +0000 Subject: [PATCH 3/4] add return --- src/decoupler/op/_hallmark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/decoupler/op/_hallmark.py b/src/decoupler/op/_hallmark.py index f63926e..f707ad6 100644 --- a/src/decoupler/op/_hallmark.py +++ b/src/decoupler/op/_hallmark.py @@ -61,6 +61,7 @@ def _task(): hm = translate(hm, columns=["genesymbol"], target_organism=organism, verbose=verbose) hm = hm.rename(columns={"geneset": "source", "genesymbol": "target"}) hm = hm.drop_duplicates(["source", "target"]).reset_index(drop=True) + return hm if as_future: with ThreadPoolExecutor(max_workers=1) as executor: From 848e46eefd6412b0cb6c79c590d20fbe08b8b4b3 Mon Sep 17 00:00:00 2001 From: Yimin Zheng Date: Fri, 8 Aug 2025 09:49:07 +0000 Subject: [PATCH 4/4] Corrent return type --- src/decoupler/op/_hallmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/decoupler/op/_hallmark.py b/src/decoupler/op/_hallmark.py index f707ad6..b71541d 100644 --- a/src/decoupler/op/_hallmark.py +++ b/src/decoupler/op/_hallmark.py @@ -1,4 +1,4 @@ -from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import Future, ThreadPoolExecutor import pandas as pd @@ -14,7 +14,7 @@ def hallmark( license: str = "academic", verbose: bool = False, as_future: bool = False, -) -> pd.DataFrame: +) -> pd.DataFrame | Future: """ Hallmark gene sets :cite:p:`msigdb`.