vllm-project
diff --git a/‎.github/workflows/scripts/test_config.yaml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/scripts/test_config.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/source/user_guide/feature_guide/index.md‎
Lines changed: 0 additions & 1 deletion b/‎docs/source/user_guide/feature_guide/index.md‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/source/user_guide/feature_guide/layerwise_kv_pool.md‎
Lines changed: 0 additions & 241 deletions b/‎docs/source/user_guide/feature_guide/layerwise_kv_pool.md‎
Lines changed: 0 additions & 241 deletions
diff --git a/‎tests/ut/attention/a2/test_mla_v1.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/ut/attention/a2/test_mla_v1.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/ut/conftest.py‎
Lines changed: 0 additions & 20 deletions b/‎tests/ut/conftest.py‎
Lines changed: 0 additions & 20 deletions
diff --git a/‎tests/ut/distributed/ascend_store/_mock_deps.py‎
Lines changed: 0 additions & 29 deletions b/‎tests/ut/distributed/ascend_store/_mock_deps.py‎
Lines changed: 0 additions & 29 deletions
@@ -211,7 +211,6 @@
   optional: false
   source_file_dependencies:
     - vllm_ascend/distributed
-    - vllm_ascend/memcache_comm_fence.py
   tests:
     - tests/ut/distributed
     - tests/e2e/pull_request/two_card/test_data_parallel.py
 
@@ -18,7 +18,6 @@ rfork
 dynamic_batch
 epd_disaggregation
 kv_pool
-layerwise_kv_pool
 kv_cache_cpu_offload
 external_dp
 large_scale_ep
 
@@ -2115,6 +2115,7 @@ def test_mla_preprocess(self, mock_get_weight_prefetch_method, mock_maybe_all_ga
         self.impl._q_proj_and_k_up_proj = MagicMock()
         self.impl._q_proj_and_k_up_proj.return_value = [MagicMock(), MagicMock()]
         self.impl.num_kv_heads = self.impl.num_heads
+        self.impl.is_kv_producer = False
 
         decode_res, prefill_res = self.impl._mla_preprocess(
             "mock_layer", hidden_states, kv_cache, attn_metadata, need_gather_q_kv=False
 
@@ -133,23 +133,3 @@
 def _clear_enable_sp_before_test():
     clear_enable_sp()
     yield
-
-
-@pytest.fixture(autouse=True)
-def _mock_ascend_store_deps(request):
-    # ascend_store code imports vllm_ascend helpers (AttentionComputeStartGate,
-    # get/reset_attention_compute_start_gate, ...) which _mock_deps.py no longer
-    # mocks globally (mutating the real modules leaked into other UTs). Mock them
-    # per-test, scoped to the ascend_store tests only.
-    if "distributed/ascend_store/" not in request.node.nodeid:
-        yield
-        return
-    from unittest.mock import patch
-
-    _pfx = "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store"
-    with (
-        patch(f"{_pfx}.pool_worker.get_attention_compute_start_gate"),
-        patch(f"{_pfx}.pool_worker.reset_attention_compute_start_gate"),
-        patch(f"{_pfx}.config_data.AttentionComputeStartGate", type("AttentionComputeStartGate", (), {})),
-    ):
-        yield
@@ -83,7 +83,6 @@
     "vllm.v1.attention",
     "vllm.v1.attention.backend",
     "vllm.v1.core",
-    "vllm.v1.core.block_pool",
     "vllm.v1.core.kv_cache_manager",
     "vllm.v1.core.kv_cache_utils",
     "vllm.v1.core.sched",
@@ -107,7 +106,6 @@
 _base_mod.KVConnectorRole = MagicMock()  # type: ignore[attr-defined]
 _base_mod.KVConnectorRole.SCHEDULER = "SCHEDULER"
 _base_mod.KVConnectorRole.WORKER = "WORKER"
-_base_mod.SupportsHMA = type("SupportsHMA", (), {})  # type: ignore[attr-defined]
 
 _events_mod = sys.modules["vllm.distributed.kv_events"]
 _events_mod.KVCacheEvent = type("KVCacheEvent", (), {})  # type: ignore[attr-defined]
@@ -207,35 +205,8 @@ def _make_pkg(name, path=""):
 )
 sys.modules["vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend"] = _backend_pkg
 
-# Mirror the real backend/__init__.py entry points. The scheduler/worker resolve
-# the backend class dynamically via ``importlib.import_module(path)``; tests that
-# exercise those paths patch ``<module>.importlib`` locally (see
-# test_pool_scheduler.py / test_pool_worker.py) so the backend resolves to a
-# MagicMock. Do NOT register the backends in sys.modules or globally wrap
-# import_module here: test_backend.py imports the real backend classes and also
-# relies on ``mock.patch`` (which itself calls importlib.import_module) resolving
-# those real modules.
-_backend_module_paths = {
-    "mooncake": "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.mooncake_backend",
-    "memcache": "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.memcache_backend",
-    "yuanrong": "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.yuanrong_backend",
-}
-_backend_pkg.backend_map = {  # type: ignore[attr-defined]
-    "mooncake": {"name": "MooncakeBackend", "path": _backend_module_paths["mooncake"]},
-    "memcache": {"name": "MemcacheBackend", "path": _backend_module_paths["memcache"]},
-    "yuanrong": {"name": "YuanrongBackend", "path": _backend_module_paths["yuanrong"]},
-}
-
 if "vllm_ascend.utils" not in sys.modules or not hasattr(sys.modules["vllm_ascend.utils"], "AscendDeviceType"):
     _ascend_utils = MagicMock()
     _ascend_utils.AscendDeviceType = MagicMock()
     _ascend_utils.get_ascend_device_type = MagicMock()
     sys.modules["vllm_ascend.utils"] = _ascend_utils
-
-# NOTE: vllm_ascend.{ascend_config, memcache_comm_fence} and their helpers
-# (get_ascend_config, AttentionComputeStartGate, ...) are intentionally NOT
-# mocked here. Doing so by mutating these real modules leaks into every other
-# UT in the same pytest session (breaking test_ascend_config / test_platform,
-# which collect after ascend_store and bind the polluted symbols at import).
-# These helpers are mocked per-test, scoped to the ascend_store tests only,
-# via the autouse fixture in tests/ut/conftest.py.