Skip to content

Commit b6ca843

Browse files
feat: replace fixed 80% RAM reserve with bounded-reserve formula (#103)
* feat: replace fixed 80% RAM reserve with bounded-reserve formula * fix: guard estimate_usable_ram against negative return
1 parent 194db20 commit b6ca843

5 files changed

Lines changed: 59 additions & 5 deletions

File tree

src/whichllm/cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,9 @@ def _auto_min_params_for_profile(hardware: HardwareInfo, profile: str) -> float
127127
return None
128128
if not hardware.gpus:
129129
return 2.0 # CPU-only: tiny is the only practical choice
130-
usable_ram = int(hardware.ram_bytes * 0.80)
130+
from whichllm.hardware.memory import estimate_usable_ram
131+
132+
usable_ram = estimate_usable_ram(hardware.ram_bytes)
131133
best_vram_gb = max(
132134
(usable_ram if g.shared_memory and g.vram_bytes == 0 else g.vram_bytes)
133135
for g in hardware.gpus

src/whichllm/engine/compatibility.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from whichllm.engine.quantization import estimate_weight_bytes
99
from whichllm.engine.types import CompatibilityResult
1010
from whichllm.engine.vram import estimate_vram
11+
from whichllm.hardware.memory import estimate_usable_ram
1112
from whichllm.hardware.types import GPUInfo, HardwareInfo
1213
from whichllm.models.types import GGUFVariant, ModelInfo
1314

@@ -56,8 +57,7 @@ def check_compatibility(
5657

5758
vram_required = estimate_vram(model, variant, context_length)
5859

59-
# Reserve 20% of RAM for OS and other processes
60-
usable_ram = int(hardware.ram_bytes * 0.80)
60+
usable_ram = estimate_usable_ram(hardware.ram_bytes)
6161

6262
# Determine best GPU
6363
best_gpu: GPUInfo | None = None

src/whichllm/hardware/memory.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@ def detect_ram_bytes() -> int:
1313
return psutil.virtual_memory().total
1414

1515

16+
def estimate_usable_ram(total: int) -> int:
17+
"""Estimate RAM available for model loading after OS/background reserve.
18+
19+
Uses a bounded-reserve formula: total - clamp(total * 0.15, 4 GiB, 32 GiB).
20+
"""
21+
_GiB = 1024**3
22+
reserve = int(total * 0.15)
23+
reserve = max(4 * _GiB, min(reserve, 32 * _GiB))
24+
return max(0, total - reserve)
25+
26+
1627
def detect_disk_free_bytes(path: str | None = None) -> int:
1728
"""Get free disk space in bytes at the given path.
1829

tests/test_compatibility.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for compatibility checking."""
22

33
from whichllm.engine.compatibility import check_compatibility
4+
from whichllm.hardware.memory import estimate_usable_ram
45
from whichllm.hardware.types import GPUInfo, HardwareInfo
56
from whichllm.models.types import GGUFVariant, ModelInfo
67

@@ -92,7 +93,7 @@ def test_shared_memory_amd_apu_uses_system_memory_pool():
9293

9394
assert result.can_run is True
9495
assert result.fit_type == "full_gpu"
95-
assert result.vram_available_bytes == int(hw.ram_bytes * 0.80)
96+
assert result.vram_available_bytes == estimate_usable_ram(hw.ram_bytes)
9697
assert not any("offload" in w.lower() for w in result.warnings)
9798
assert not any("cpu only" in w.lower() for w in result.warnings)
9899

@@ -121,7 +122,7 @@ def test_windows_shared_memory_amd_apu_does_not_emit_rocm_warning():
121122

122123
assert result.can_run is True
123124
assert result.fit_type == "full_gpu"
124-
assert result.vram_available_bytes == int(hw.ram_bytes * 0.80)
125+
assert result.vram_available_bytes == estimate_usable_ram(hw.ram_bytes)
125126
assert not any("rocm" in w.lower() for w in result.warnings)
126127
assert not any("offload" in w.lower() for w in result.warnings)
127128

tests/test_memory.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""Tests for hardware.memory — estimate_usable_ram bounded-reserve formula."""
2+
3+
import pytest
4+
5+
from whichllm.hardware.memory import estimate_usable_ram
6+
7+
_GiB = 1024**3
8+
9+
10+
def _expected_usable(total: int) -> int:
11+
reserve = int(total * 0.15)
12+
reserve = max(4 * _GiB, min(reserve, 32 * _GiB))
13+
return total - reserve
14+
15+
16+
@pytest.mark.parametrize(
17+
"total_gb",
18+
[16, 32, 64, 128, 1024],
19+
ids=["16GB", "32GB", "64GB", "128GB", "1TB"],
20+
)
21+
def test_estimate_usable_ram(total_gb):
22+
total = total_gb * _GiB
23+
assert estimate_usable_ram(total) == _expected_usable(total)
24+
25+
26+
def test_16gb_hits_min_reserve():
27+
total = 16 * _GiB
28+
assert estimate_usable_ram(total) == total - 4 * _GiB
29+
30+
31+
def test_1tb_hits_max_reserve():
32+
total = 1024 * _GiB
33+
assert estimate_usable_ram(total) == total - 32 * _GiB
34+
35+
36+
def test_midrange_uses_percentage():
37+
total = 64 * _GiB
38+
expected_reserve = int(total * 0.15)
39+
assert 4 * _GiB < expected_reserve < 32 * _GiB
40+
assert estimate_usable_ram(total) == total - expected_reserve

0 commit comments

Comments
 (0)