tmp maybe_inplace

ProExpertProg · ProExpertProg · commit d5e968ef4877 · 2026-03-11T17:44:01.000-04:00
diff --git a/vllm/compilation/passes/ir/inplace_raising.py b/vllm/compilation/passes/ir/inplace_raising.py
@@ -28,6 +28,9 @@ class VllmIRInplaceRaisingPass(VllmInductorPass):
     The maybe_inplace overloads have the same signature as the default overload
     so the pass simply replaces the called overload.
     That makes the graph properly functional.
+
+    This pass operates pre-AOTAutograd,
+    so it must handle non-normalized and non-functional IR.
     """
 
     def __init__(self, vllm_config: VllmConfig) -> None:
@@ -56,12 +59,40 @@ def __call__(self, graph: fx.Graph) -> None:
             # must have maybe_inplace overload and allow_inplace
             assert ir_op.allow_inplace and ir_op.maybe_inplace is not None
 
+            # Check that activation inputs are not used after this op
+            for arg_idx in ir_op.activation_indices:
+                arg = node.args[arg_idx]
+                assert isinstance(arg, fx.Node), "Activation inputs must be fx.Node"
+                for user in arg.users:
+                    if user is not node:
+                        # TODO only check topologically?
+                        logger.warning(
+                            "Node %s (input to %s) has another use", arg, node
+                        )
+                        # TODO raise error, this is undefined behavior, which should not be allowed.
+                        #  Users can just use the default overload if they want to keep activation inputs untouched.
+
+                if arg.op == "placeholder":
+                    # This node represents a graph input, and maybe_inplace might modify it,
+                    # meaning the user does not care about it.
+                    # Mark it dirty so downstream passes know it can be modified without affecting correctness.
+                    # TODO should we store this in node.meta instead?
+                    arg.meta["custom"] = {
+                        "is_consumed": True,
+                        **arg.meta.get("custom", {}),
+                    }
+                    logger.debug(
+                        "vLLM IR op %s has an activation input that is a graph input",
+                        ir_op.name,
+                    )
+
             # Same signature, just replace the overload that's called.
             node.target = ir_op.torch_op
+            node.meta["custom"] = {"maybe_inplace": True, **node.meta.get("custom", {})}
             self.raised_ops[ir_op.name] += 1
 
         count = sum(self.raised_ops.values())
         ops = ",".join(self.raised_ops.keys())
         logger.debug(
-            "VllmIRLoweringPass raised %d vLLM IR nodes for op(s) %s", count, ops
+            "VllmIRInplaceRaisingPass raised %d vLLM IR nodes for op(s) %s", count, ops
         )
diff --git a/vllm/compilation/passes/ir/lowering_pass.py b/vllm/compilation/passes/ir/lowering_pass.py
@@ -3,6 +3,7 @@
 from collections import defaultdict
 from collections.abc import Iterable
 
+import torch
 from torch import fx
 from torch._inductor.pattern_matcher import (
     CallFunctionVarArgs,
@@ -17,6 +18,7 @@
 from vllm.logger import init_logger
 from vllm.logging_utils import lazy
 
+from ..fx_utils import is_func
 from ..vllm_inductor_pass import VllmInductorPass
 
 logger = init_logger(__name__)
@@ -138,3 +140,35 @@ def print_count(counts: dict[str, int]) -> str:
         if failed_nodes or failed_ops:
             logger.warning("Failed to lower vLLM IR ops: %s", ",".join(failed_ops))
             logger.warning("Full node list: %s", failed_nodes)
+
+
+class CloneCleanupPass(VllmInductorPass):
+    """
+    This pass removes clone nodes that are no longer needed after vLLM IR lowering.
+    """
+
+    def __init__(self, vllm_config: VllmConfig) -> None:
+        super().__init__(vllm_config)
+
+    @VllmInductorPass.time_and_log
+    def __call__(self, graph: fx.Graph) -> None:
+        count = 0
+        for node in graph.nodes:
+            if "custom" in node.meta:
+                logger.info(
+                    "Node %s with meta['custom']=%s, users: %s",
+                    node,
+                    node.meta["custom"],
+                    list(node.users),
+                )
+
+            if not is_func(node, torch.ops.aten.clone.default):
+                continue
+
+            logger.info("Node %s is a clone node, removing it", node)
+            continue  # TODO
+            node.replace_all_uses_with(node.args[0])
+            graph.erase_node(node)
+            count += 1
+
+        logger.debug("CloneCleanupPass removed %d clone nodes", count)
diff --git a/vllm/compilation/passes/pass_manager.py b/vllm/compilation/passes/pass_manager.py
@@ -14,7 +14,7 @@
 from vllm.platforms import current_platform
 from vllm.utils.system_utils import set_env_var
 
-from .ir.lowering_pass import VllmIRLoweringPass
+from .ir.lowering_pass import CloneCleanupPass, VllmIRLoweringPass
 from .vllm_inductor_pass import VllmInductorPass
 
 if rocm_aiter_ops.is_enabled():
@@ -109,6 +109,8 @@ def __call__(self, graph: fx.Graph) -> None:
         # DCE handles mutating ops correctly as well.
         self.ir_lowering(graph)
         VllmInductorPass.dump_prefix += 1
+        self.clone_cleanup(graph)
+        VllmInductorPass.dump_prefix += 1
 
         # clean up after lowering again
         self.post_cleanup(graph)
@@ -161,6 +163,7 @@ def configure(self, config: VllmConfig) -> None:
                 self.passes += [QKNormRoPEFusionPass(config)]
 
             self.ir_lowering = VllmIRLoweringPass(config)
+            self.clone_cleanup = CloneCleanupPass(config)
             self.post_cleanup = PostCleanupPass(config)
             self.fix_functionalization = FixFunctionalizationPass(config)
 
@@ -182,6 +185,7 @@ def uuid(self) -> str:
 
         passes.append(self.post_cleanup.uuid())
         passes.append(self.ir_lowering.uuid())
+        passes.append(self.clone_cleanup.uuid())
         passes.append(self.post_cleanup.uuid())
         passes.append(self.fix_functionalization.uuid())
 
diff --git a/vllm/compilation/passes/utility/noop_elimination.py b/vllm/compilation/passes/utility/noop_elimination.py
@@ -69,6 +69,14 @@ def __call__(self, graph: torch.fx.Graph) -> None:
         count = 0
         # Remove no-op reshapes/views:
         for node in graph.nodes:
+            if "custom" in node.meta:
+                logger.info(
+                    "Node %s with meta['custom']=%s, users: %s",
+                    node,
+                    node.meta["custom"],
+                    list(node.users),
+                )
+
             if is_func(node, torch.ops.aten.reshape.default):
                 # Case 1: rewrite reshape chains to reshapes on the base tensor
                 input = node.args[0]