Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/experimental/offline_inference_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def main():
block_size=args.block_size,
enable_chunked_prefill=True,
max_num_batched_tokens=128,
gpu_memory_utilization=1,
gpu_memory_utilization=0.9,
enable_expert_parallel=args.enable_expert_parallel,
)

Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ ignore = [
"UP007",
]

[tool.pytest.ini_options]
markers = [
"cpu_test: mark test as CPU-only test",
]

[tool.mypy]
ignore_missing_imports = true
check_untyped_defs = true
Expand Down
21 changes: 0 additions & 21 deletions tests/torch_compile/common/test_logger.py

This file was deleted.

77 changes: 0 additions & 77 deletions tests/torch_compile/common/test_platform.py

This file was deleted.

78 changes: 0 additions & 78 deletions tests/torch_compile/common/test_rbln_envs.py

This file was deleted.

11 changes: 8 additions & 3 deletions tests/torch_compile/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@
# limitations under the License.

import pytest
from vllm.config import (CacheConfig, ModelConfig, ParallelConfig,
SchedulerConfig, VllmConfig)
from vllm.config import (
CacheConfig,
ModelConfig,
ParallelConfig,
SchedulerConfig,
VllmConfig,
)
from vllm.plugins import load_general_plugins


Expand All @@ -29,7 +34,7 @@ def initialize_environment():

@pytest.fixture
def vllm_config():
scheduler_config = SchedulerConfig()
scheduler_config = SchedulerConfig.default_factory()
model_config = ModelConfig(model="facebook/opt-125m")
cache_config = CacheConfig(
block_size=1024,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@

from vllm.v1.request import RequestStatus

from tests.torch_compile.v1.core.utils import (create_requests,
create_runner_output,
create_scheduler)
from tests.torch_compile.integrations.v1.core.utils import (
create_requests,
create_runner_output,
create_scheduler,
)


def test_schedule():
Expand Down Expand Up @@ -50,8 +52,9 @@ def test_schedule():
output = scheduler.schedule()
assert output.scheduled_cached_reqs.num_reqs == len(requests)
assert len(output.num_scheduled_tokens) == len(requests)
assert all(num_tokens == 1
for num_tokens in output.num_scheduled_tokens.values())
assert all(
num_tokens == 1 for num_tokens in output.num_scheduled_tokens.values()
)
assert len(output.finished_req_ids) == 0


Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# limitations under the License.

import pytest
from vllm.lora.models import LoRAModel
from vllm.lora.lora_model import LoRAModel
from vllm.lora.peft_helper import PEFTHelper
from vllm.model_executor.models.baichuan import BaiChuanBaseForCausalLM
from vllm.model_executor.models.utils import WeightsMapper
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
"""
Script to test add_lora, remove_lora, pin_lora, list_loras functions.
"""

import pytest

from vllm.engine.arg_utils import EngineArgs
from vllm.engine.llm_engine import LLMEngine
from vllm.lora.request import LoRARequest
Expand All @@ -27,11 +30,15 @@


def make_lora_request(lora_id: int):
return LoRARequest(lora_name=f"{lora_id}",
lora_int_id=lora_id,
lora_path=LORA_MODULE_PATH)
return LoRARequest(
lora_name=f"{lora_id}", lora_int_id=lora_id, lora_path=LORA_MODULE_PATH
)


@pytest.mark.skip(
reason="Integration test: requires RBLN device and conflicts with "
"session-scoped RBLN_DEVICES initialization"
)
def test_lora_functions_sync(monkeypatch):
monkeypatch.setenv("RBLN_PROFILER", "0")
monkeypatch.setenv("RBLN_KERNEL_MODEL", "triton")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# limitations under the License.

import pytest
from vllm.lora.models import LoRAModel
from vllm.lora.lora_model import LoRAModel
from vllm.lora.peft_helper import PEFTHelper
from vllm.lora.utils import get_adapter_absolute_path
from vllm.model_executor.models.llama import LlamaForCausalLM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@
from torch import nn
from vllm.config import ModelConfig, VllmConfig
from vllm.config.lora import LoRAConfig
from vllm.lora.layers import (ColumnParallelLinearWithLoRA,
from vllm.lora.layers import (ColumnParallelLinearWithLoRA, LoRAMapping,
MergedColumnParallelLinearWithLoRA,
RowParallelLinearWithLoRA)
from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights
from vllm.lora.models import (LoRAMapping, LoRAModel, LoRAModelManager,
LRUCacheLoRAModelManager)
from vllm.lora.lora_model import LoRAModel
from vllm.lora.model_manager import (LoRAModelManager,
LRUCacheLoRAModelManager)
from vllm.lora.peft_helper import PEFTHelper
from vllm.lora.request import LoRARequest
from vllm.lora.worker_manager import (LRUCacheWorkerLoRAManager,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
set_current_vllm_config)
from vllm.config.load import LoadConfig
from vllm.config.lora import LoRAConfig
from vllm.lora.models import LoRAMapping
from vllm.lora.layers import LoRAMapping
from vllm.lora.request import LoRARequest

from vllm_rbln.v1.worker.rbln_worker import RBLNWorker as Worker
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@
import pytest
import torch

pytestmark = pytest.mark.cpu_test


@pytest.fixture
def attn_metadata_mock():
from vllm_rbln.v1.attention.backends.flash_attention import (
RBLNFlashAttentionMetadata)
RBLNFlashAttentionMetadata,
)

attn_metadata_mock = MagicMock(spec=RBLNFlashAttentionMetadata)
attn_metadata_mock.num_actual_tokens = 16
Expand All @@ -31,13 +34,18 @@ def attn_metadata_mock():
def test_forward_context(vllm_config, attn_metadata_mock: MagicMock):
# forward_context
from vllm.forward_context import get_forward_context, set_forward_context

with set_forward_context(
attn_metadata_mock,
vllm_config,
num_tokens_across_dp=torch.tensor([0, 1]),
attn_metadata_mock,
vllm_config,
num_tokens_across_dp=torch.tensor([0, 1]),
num_padded_tokens=1,
):
# assert dp_metadata class name is RBLNDPMetadata
assert (get_forward_context().dp_metadata.__class__.__name__ ==
"RBLNDPMetadata"
), f"Expected 'dp_metadata' class name is RBLNDPMetadata, \
assert (
get_forward_context().dp_metadata.__class__.__name__
== "RBLNDPMetadata"
), (
f"Expected 'dp_metadata' class name is RBLNDPMetadata, \
got {get_forward_context().dp_metadata.__class__.__name__}"
)
Loading