vllm-project
diff --git a/‎vllm_ascend/pool/__init__.py‎ b/‎vllm_ascend/pool/__init__.py‎
diff --git a/‎vllm_ascend/pool/medatata.py‎
Lines changed: 0 additions & 11 deletions b/‎vllm_ascend/pool/medatata.py‎
Lines changed: 0 additions & 11 deletions
diff --git a/‎vllm_ascend/worker/npu_input_batch.py‎
Lines changed: 10 additions & 1 deletion b/‎vllm_ascend/worker/npu_input_batch.py‎
Lines changed: 10 additions & 1 deletion
@@ -26,10 +26,19 @@
                                              LogitsProcessors)
 from vllm.v1.worker.gpu_input_batch import InputBatch
 
-from vllm_ascend.pool.medatata import PoolingStates
 from vllm_ascend.worker.block_table import MultiGroupBlockTable
 
 
+class PoolingStates:
+    # NOTE: This should be removed after we drop support of vLLM v0.12.0
+    def __init__(self):
+        # for chunked prefill with ALL pooling
+        self.hidden_states_cache: list[torch.Tensor] = []
+
+    def clean(self):
+        self.hidden_states_cache.clear()
+
+
 class NPUInputBatch(InputBatch):
 
     def __init__(