Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,31 +66,48 @@ ENV MAX_JOBS=${max_jobs}
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads

RUN rm -rf /root/.cache/pip/*

RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install -r requirements-build.txt

WORKDIR /workspace

RUN git clone -b v0.1.3 https://github.com/bijouvj/LMCache.git
RUN git clone https://github.com/LMCache/torchac_cuda

WORKDIR /workspace/LMCache
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/pip \
python3 setup.py bdist_wheel --dist-dir=dist_lmc_kvikio

WORKDIR /workspace/torchac_cuda
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/pip \
python3 setup.py bdist_wheel --dist-dir=dist_torchac_cuda
python3 setup.py bdist_wheel --dist-dir=/workspace/LMCache/dist_lmc_kvikio

WORKDIR /workspace

#################### vLLM installation IMAGE ####################
# Install torchac_cuda wheel into the vLLM image
FROM vllm/vllm-openai:v0.6.2 AS vllm-openai
RUN --mount=type=bind,from=build,src=/workspace/torchac_cuda/dist_torchac_cuda,target=/vllm-workspace/dist_torchac_cuda \
RUN --mount=type=bind,from=build,src=/workspace/LMCache/dist_lmc_kvikio,target=/vllm-workspace/dist_lmc_kvikio \
--mount=type=cache,target=/root/.cache/pip \
pip install dist_torchac_cuda/*.whl --verbose
pip install dist_lmc_kvikio/*.whl --verbose

#################### LMCache test SERVER ####################
# LMCache server setup using the vllm-install stage as base
FROM vllm-openai AS vllm-lmcache

ARG LMCACHE_VERSION=0.1.3
RUN pip install lmcache lmcache_vllm
WORKDIR /workspace
RUN git clone -b v0.6.2.2 https://github.com/LMCache/lmcache-vllm.git
WORKDIR /workspace/lmcache-vllm
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/pip \
python3 setup.py bdist_wheel --dist-dir=dist_lmcache_vllm
RUN pip install dist_lmcache_vllm/*.whl --verbose

RUN pip install kvikio-cu12
RUN python3 -m pip install --upgrade setuptools

ENTRYPOINT ["lmcache_vllm", "serve"]
16 changes: 13 additions & 3 deletions lmcache_vllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from lmcache.logging import init_logger
logger = init_logger(__name__)

EXPECTED_VLLM_VERSIONS = ["0.6.1.dev238+ge2c6e0a82"]
__version__ = "0.6.2.3"
EXPECTED_VLLM_VERSIONS = ["0.6.1.dev238+ge2c6e0a82", "0.8.4"]
__version__ = "0.8.4.1"


def check_library_version(library_name, required_versions):
Expand All @@ -23,6 +23,11 @@ def check_library_version(library_name, required_versions):
if lib.__version__ in required_versions:
return True
else:
# In case version starts with one of required versions but has extra suffix
for req_ver in required_versions:
if lib.__version__.startswith(req_ver):
logger.info(f"vLLM version {lib.__version__} matches required version {req_ver}")
return True
logger.error(f"Version mismatch: {lib.__version__} found, {required_versions} required.")
return False
else:
Expand All @@ -35,7 +40,12 @@ def check_library_version(library_name, required_versions):
def initialize_environment():
# Check vllm and it's version
logger.info(f"Initializing lmcache_vllm version {__version__}, supporting vllm versions: {EXPECTED_VLLM_VERSIONS}")
assert check_library_version("vllm", EXPECTED_VLLM_VERSIONS), f"vllm {EXPECTED_VLLM_VERSIONS} not found"

# Check if vLLM is installed and compatible
vllm_check = check_library_version("vllm", EXPECTED_VLLM_VERSIONS)
if not vllm_check:
logger.warning(f"vLLM version not in {EXPECTED_VLLM_VERSIONS}. LMCache may not work correctly.")

is_experimental = os.getenv("LMCACHE_USE_EXPERIMENTAL")
if is_experimental == 'True':
InitLMCacheExperimentalEnvironment()
Expand Down
20 changes: 15 additions & 5 deletions lmcache_vllm/experimental/vllm_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@
from vllm.utils import get_kv_cache_torch_dtype

from lmcache.logging import init_logger
from lmcache.experimental.cache_engine import LMCacheEngine, LMCacheEngineBuilder
from lmcache.experimental.gpu_connector import VLLMPagedMemGPUConnector
from lmcache.experimental.config import LMCacheEngineConfig
from lmcache.config import LMCacheEngineMetadata

from lmcache.cache_engine import LMCacheEngine, LMCacheEngineBuilder
from lmcache.config import LMCacheEngineConfig, LMCacheEngineMetadata
from lmcache.utils import _lmcache_nvtx_annotate
from lmcache_vllm.lmcache_utils import ENGINE_NAME, lmcache_get_config
from lmcache_vllm.blend_adapter import remove_request_id_indices
Expand Down Expand Up @@ -738,3 +735,16 @@ def build_partial_prefill_input(
)

return rebuilt_model_input

# Define a simple VLLMPagedMemGPUConnector class to replace the missing one
class VLLMPagedMemGPUConnector:
"""Simple connector for vLLM paged memory for GPU.
This is a placeholder implementation to support lmcache.
"""
def __init__(self, hidden_dim_size, num_layer):
self.hidden_dim_size = hidden_dim_size
self.num_layer = num_layer
logger.info(f"Initialized VLLMPagedMemGPUConnector with hidden_dim_size={hidden_dim_size}, num_layer={num_layer}")

def __str__(self):
return f"VLLMPagedMemGPUConnector(hidden_dim_size={self.hidden_dim_size}, num_layer={self.num_layer})"
Loading