Skip to content

Commit 894038d

Browse files
committed
Revert "[KV Cache][Feature] Support Layerwise KV Pooling (#10077)"
This reverts commit 5e39074. Signed-off-by: F.Liu <1661888967@qq.com>
1 parent ab065ff commit 894038d

29 files changed

Lines changed: 972 additions & 5461 deletions

.github/workflows/scripts/test_config.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,6 @@
211211
optional: false
212212
source_file_dependencies:
213213
- vllm_ascend/distributed
214-
- vllm_ascend/memcache_comm_fence.py
215214
tests:
216215
- tests/ut/distributed
217216
- tests/e2e/pull_request/two_card/test_data_parallel.py

docs/source/user_guide/feature_guide/index.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ rfork
1818
dynamic_batch
1919
epd_disaggregation
2020
kv_pool
21-
layerwise_kv_pool
2221
kv_cache_cpu_offload
2322
external_dp
2423
large_scale_ep

docs/source/user_guide/feature_guide/layerwise_kv_pool.md

Lines changed: 0 additions & 241 deletions
This file was deleted.

tests/ut/attention/a2/test_mla_v1.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2115,6 +2115,7 @@ def test_mla_preprocess(self, mock_get_weight_prefetch_method, mock_maybe_all_ga
21152115
self.impl._q_proj_and_k_up_proj = MagicMock()
21162116
self.impl._q_proj_and_k_up_proj.return_value = [MagicMock(), MagicMock()]
21172117
self.impl.num_kv_heads = self.impl.num_heads
2118+
self.impl.is_kv_producer = False
21182119

21192120
decode_res, prefill_res = self.impl._mla_preprocess(
21202121
"mock_layer", hidden_states, kv_cache, attn_metadata, need_gather_q_kv=False

tests/ut/conftest.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -133,23 +133,3 @@
133133
def _clear_enable_sp_before_test():
134134
clear_enable_sp()
135135
yield
136-
137-
138-
@pytest.fixture(autouse=True)
139-
def _mock_ascend_store_deps(request):
140-
# ascend_store code imports vllm_ascend helpers (AttentionComputeStartGate,
141-
# get/reset_attention_compute_start_gate, ...) which _mock_deps.py no longer
142-
# mocks globally (mutating the real modules leaked into other UTs). Mock them
143-
# per-test, scoped to the ascend_store tests only.
144-
if "distributed/ascend_store/" not in request.node.nodeid:
145-
yield
146-
return
147-
from unittest.mock import patch
148-
149-
_pfx = "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store"
150-
with (
151-
patch(f"{_pfx}.pool_worker.get_attention_compute_start_gate"),
152-
patch(f"{_pfx}.pool_worker.reset_attention_compute_start_gate"),
153-
patch(f"{_pfx}.config_data.AttentionComputeStartGate", type("AttentionComputeStartGate", (), {})),
154-
):
155-
yield

tests/ut/distributed/ascend_store/_mock_deps.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@
8383
"vllm.v1.attention",
8484
"vllm.v1.attention.backend",
8585
"vllm.v1.core",
86-
"vllm.v1.core.block_pool",
8786
"vllm.v1.core.kv_cache_manager",
8887
"vllm.v1.core.kv_cache_utils",
8988
"vllm.v1.core.sched",
@@ -107,7 +106,6 @@
107106
_base_mod.KVConnectorRole = MagicMock() # type: ignore[attr-defined]
108107
_base_mod.KVConnectorRole.SCHEDULER = "SCHEDULER"
109108
_base_mod.KVConnectorRole.WORKER = "WORKER"
110-
_base_mod.SupportsHMA = type("SupportsHMA", (), {}) # type: ignore[attr-defined]
111109

112110
_events_mod = sys.modules["vllm.distributed.kv_events"]
113111
_events_mod.KVCacheEvent = type("KVCacheEvent", (), {}) # type: ignore[attr-defined]
@@ -207,35 +205,8 @@ def _make_pkg(name, path=""):
207205
)
208206
sys.modules["vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend"] = _backend_pkg
209207

210-
# Mirror the real backend/__init__.py entry points. The scheduler/worker resolve
211-
# the backend class dynamically via ``importlib.import_module(path)``; tests that
212-
# exercise those paths patch ``<module>.importlib`` locally (see
213-
# test_pool_scheduler.py / test_pool_worker.py) so the backend resolves to a
214-
# MagicMock. Do NOT register the backends in sys.modules or globally wrap
215-
# import_module here: test_backend.py imports the real backend classes and also
216-
# relies on ``mock.patch`` (which itself calls importlib.import_module) resolving
217-
# those real modules.
218-
_backend_module_paths = {
219-
"mooncake": "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.mooncake_backend",
220-
"memcache": "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.memcache_backend",
221-
"yuanrong": "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.yuanrong_backend",
222-
}
223-
_backend_pkg.backend_map = { # type: ignore[attr-defined]
224-
"mooncake": {"name": "MooncakeBackend", "path": _backend_module_paths["mooncake"]},
225-
"memcache": {"name": "MemcacheBackend", "path": _backend_module_paths["memcache"]},
226-
"yuanrong": {"name": "YuanrongBackend", "path": _backend_module_paths["yuanrong"]},
227-
}
228-
229208
if "vllm_ascend.utils" not in sys.modules or not hasattr(sys.modules["vllm_ascend.utils"], "AscendDeviceType"):
230209
_ascend_utils = MagicMock()
231210
_ascend_utils.AscendDeviceType = MagicMock()
232211
_ascend_utils.get_ascend_device_type = MagicMock()
233212
sys.modules["vllm_ascend.utils"] = _ascend_utils
234-
235-
# NOTE: vllm_ascend.{ascend_config, memcache_comm_fence} and their helpers
236-
# (get_ascend_config, AttentionComputeStartGate, ...) are intentionally NOT
237-
# mocked here. Doing so by mutating these real modules leaks into every other
238-
# UT in the same pytest session (breaking test_ascend_config / test_platform,
239-
# which collect after ascend_store and bind the polluted symbols at import).
240-
# These helpers are mocked per-test, scoped to the ascend_store tests only,
241-
# via the autouse fixture in tests/ut/conftest.py.

0 commit comments

Comments
 (0)