handle empty compute and determ token for map

fjetter · fjetter · commit 19397fc223c8 · 2025-03-18T17:43:28.000+01:00
diff --git a/distributed/client.py b/distributed/client.py
@@ -848,6 +848,12 @@ class _MapExpr(Expr):
     ]
     _defaults = {"_cached_keys": None}
 
+    @property
+    def deterministic_token(self):
+        if not self.pure:
+            self._determ_token = uuid.uuid4().hex
+        return super().deterministic_token
+
     @property
     def keys(self) -> Iterable[Key]:
         if self._cached_keys is not None:
@@ -3629,27 +3635,28 @@ def compute(
         metadata = SpanMetadata(
             collections=[get_collections_metadata(v) for v in variables]
         )
-
-        expr = collections_to_expr(variables, optimize_graph, **kwargs)
-        from dask._expr import FinalizeCompute
-
-        expr = FinalizeCompute(expr)
-
-        expr = expr.optimize()
-        names = list(flatten(expr.__dask_keys__()))
-
-        futures_dict = self._graph_to_futures(
-            expr,
-            names,
-            workers=workers,
-            allow_other_workers=allow_other_workers,
-            resources=resources,
-            retries=retries,
-            user_priority=priority,
-            fifo_timeout=fifo_timeout,
-            actors=actors,
-            span_metadata=metadata,
-        )
+        futures_dict = {}
+        if variables:
+            expr = collections_to_expr(variables, optimize_graph, **kwargs)
+            from dask._expr import FinalizeCompute
+
+            expr = FinalizeCompute(expr)
+
+            expr = expr.optimize()
+            names = list(flatten(expr.__dask_keys__()))
+
+            futures_dict = self._graph_to_futures(
+                expr,
+                names,
+                workers=workers,
+                allow_other_workers=allow_other_workers,
+                resources=resources,
+                retries=retries,
+                user_priority=priority,
+                fifo_timeout=fifo_timeout,
+                actors=actors,
+                span_metadata=metadata,
+            )
 
         i = 0
         futures = []
diff --git a/distributed/tests/test_client.py b/distributed/tests/test_client.py
@@ -270,6 +270,13 @@ async def test_custom_key_with_batches(c, s, a, b):
     await wait(futs)
 
 
+@gen_cluster(client=True)
+async def test_compute_no_collection_or_future(c, s, *workers):
+    assert c.compute(1) == 1
+
+    assert await c.gather(c.compute((1, delayed(inc)(1)))) == [1, 2]
+
+
 @gen_cluster(client=True)
 async def test_compute_retries(c, s, a, b):
     args = [ZeroDivisionError("one"), ZeroDivisionError("two"), 3]
diff --git a/distributed/tests/test_scheduler.py b/distributed/tests/test_scheduler.py
@@ -2823,11 +2823,16 @@ async def test_default_task_duration_splits(c, s, a, b):
     npart = 10
     df = dd.from_pandas(pd.DataFrame({"A": range(100), "B": 1}), npartitions=npart)
     with dask.config.set({"dataframe.shuffle.method": "tasks"}):
-        graph = df.shuffle(
-            "A",
-            # If we don't have enough partitions, we'll fall back to a simple shuffle
-            max_branch=npart - 1,
-        ).sum()
+        graph = (
+            df.shuffle(
+                "A",
+                # If we don't have enough partitions, we'll fall back to a
+                # simple shuffle
+                max_branch=npart - 1,
+            )
+            # Block optimizer from killing the shuffle
+            .map_partitions(lambda x: len(x)).sum()
+        )
     fut = c.compute(graph)
     await wait(fut)
 
diff --git a/distributed/tests/test_steal.py b/distributed/tests/test_steal.py
@@ -1082,11 +1082,16 @@ async def test_blocklist_shuffle_split(c, s, a, b):
     npart = 10
     df = dd.from_pandas(pd.DataFrame({"A": range(100), "B": 1}), npartitions=npart)
     with dask.config.set({"dataframe.shuffle.method": "tasks"}):
-        graph = df.shuffle(
-            "A",
-            # If we don't have enough partitions, we'll fall back to a simple shuffle
-            max_branch=npart - 1,
-        ).sum()
+        graph = (
+            df.shuffle(
+                "A",
+                # If we don't have enough partitions, we'll fall back to a
+                # simple shuffle
+                max_branch=npart - 1,
+            )
+            # Block optimizer from killing the shuffle
+            .map_partitions(lambda x: len(x)).sum()
+        )
     res = c.compute(graph)
 
     while not s.tasks: