forked from waybarrios/vllm-mlx
-
Notifications
You must be signed in to change notification settings - Fork 56
Expand file tree
/
Copy pathtest_optimizations.py
More file actions
110 lines (78 loc) · 3.05 KB
/
test_optimizations.py
File metadata and controls
110 lines (78 loc) · 3.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# SPDX-License-Identifier: Apache-2.0
"""
Tests for vllm-mlx hardware detection and system info.
Usage:
pytest tests/test_optimizations.py -v
"""
import pytest
class TestHardwareDetection:
"""Tests for hardware detection functionality."""
def test_detect_hardware(self):
"""Test that hardware detection works."""
from vllm_mlx.optimizations import detect_hardware
hw = detect_hardware()
assert hw is not None
assert hw.chip_name is not None
assert hw.total_memory_gb > 0
assert hw.memory_bandwidth_gbs > 0
assert hw.gpu_cores > 0
def test_get_system_memory(self):
"""Test that system memory detection works."""
from vllm_mlx.optimizations import get_system_memory_gb
memory_gb = get_system_memory_gb()
assert memory_gb > 0
assert memory_gb < 1024 # Sanity check: less than 1TB
def test_hardware_profiles_exist(self):
"""Test that hardware profiles are defined."""
from vllm_mlx.optimizations import HARDWARE_PROFILES
assert len(HARDWARE_PROFILES) > 0
assert "M1" in HARDWARE_PROFILES
assert "M4 Max" in HARDWARE_PROFILES
class TestOptimizationStatus:
"""Tests for optimization status reporting."""
def test_get_optimization_status(self):
"""Test optimization status reporting."""
from vllm_mlx.optimizations import get_optimization_status
status = get_optimization_status()
assert "hardware" in status
assert "mlx_memory" in status
assert "mlx_lm_features" in status
assert "chip" in status["hardware"]
assert "device_name" in status["hardware"]
class TestMemoryBandwidth:
"""Tests for memory bandwidth benchmarking."""
@pytest.mark.slow
def test_memory_bandwidth_benchmark(self):
"""Test memory bandwidth benchmark."""
from vllm_mlx.optimizations import benchmark_memory_bandwidth
results = benchmark_memory_bandwidth()
assert "1MB" in results
assert "4MB" in results
assert "16MB" in results
print(f"\n{'=' * 50}")
print("Memory Bandwidth Benchmark")
print(f"{'=' * 50}")
for size, bandwidth in results.items():
print(f"{size}: {bandwidth}")
print(f"{'=' * 50}")
def run_quick_test():
"""Run a quick test of hardware detection."""
from vllm_mlx.optimizations import detect_hardware, get_optimization_status
print("=" * 60)
print("Quick Hardware Detection Test")
print("=" * 60)
hw = detect_hardware()
print("\nHardware Detection:")
print(f" Chip: {hw.chip_name}")
print(f" Memory: {hw.total_memory_gb:.1f} GB")
print(f" Bandwidth: {hw.memory_bandwidth_gbs} GB/s")
print(f" GPU Cores: {hw.gpu_cores}")
status = get_optimization_status()
print("\nMLX-LM Features (built-in):")
for feature, value in status["mlx_lm_features"].items():
print(f" {feature}: {value}")
print("\n" + "=" * 60)
print("Done!")
print("=" * 60)
if __name__ == "__main__":
run_quick_test()