per code-rabbit comment

jiahanc · jiahanc · commit ed417aadc974 · 2026-04-07T18:54:02.000-07:00
Signed-off-by: jiahanc &lt;173873397+jiahanc@users.noreply.github.com&gt;
diff --git a/flashinfer/gdn_kernels/blackwell/gdn_prefill.py b/flashinfer/gdn_kernels/blackwell/gdn_prefill.py
@@ -221,11 +221,10 @@ def chunk_gated_delta_rule_sm100(
     workspace_size = GatedDeltaNetChunkedKernel.get_workspace_size(
         num_sm, B, HQ, HV, True
     )
-    if "workspace" not in cache or cache["workspace"].size(0) < workspace_size:
-        cache["workspace"] = torch.empty(
-            workspace_size, dtype=torch.int8, device=q.device
-        )
-    workspace = cache["workspace"]
+    ws_key = f"workspace_{q.device.index}"
+    if ws_key not in cache or cache[ws_key].size(0) < workspace_size:
+        cache[ws_key] = torch.empty(workspace_size, dtype=torch.int8, device=q.device)
+    workspace = cache[ws_key]
 
     stream = cuda.CUstream(torch.cuda.current_stream().cuda_stream)
     compiled(