|
4 | 4 | from inference.core import logger
|
5 | 5 | from inference.core.entities.requests.inference import InferenceRequest
|
6 | 6 | from inference.core.entities.responses.inference import InferenceResponse
|
7 |
| -from inference.core.env import DISK_CACHE_CLEANUP |
| 7 | +from inference.core.env import DISK_CACHE_CLEANUP, MEMORY_FREE_THRESHOLD |
8 | 8 | from inference.core.managers.base import Model, ModelManager
|
9 | 9 | from inference.core.managers.decorators.base import ModelManagerDecorator
|
10 | 10 | from inference.core.managers.entities import ModelDescription
|
@@ -43,7 +43,9 @@ def add_model(
|
43 | 43 | return None
|
44 | 44 |
|
45 | 45 | logger.debug(f"Current capacity of ModelManager: {len(self)}/{self.max_size}")
|
46 |
| - while len(self) >= self.max_size: |
| 46 | + while len(self) >= self.max_size or ( |
| 47 | + MEMORY_FREE_THRESHOLD and self.memory_pressure_detected() |
| 48 | + ): |
47 | 49 | to_remove_model_id = self._key_queue.popleft()
|
48 | 50 | super().remove(
|
49 | 51 | to_remove_model_id, delete_from_disk=DISK_CACHE_CLEANUP
|
@@ -141,3 +143,23 @@ def _resolve_queue_id(
|
141 | 143 | self, model_id: str, model_id_alias: Optional[str] = None
|
142 | 144 | ) -> str:
|
143 | 145 | return model_id if model_id_alias is None else model_id_alias
|
| 146 | + |
| 147 | + def memory_pressure_detected(self) -> bool: |
| 148 | + return_boolean = False |
| 149 | + try: |
| 150 | + import torch |
| 151 | + |
| 152 | + if torch.cuda.is_available(): |
| 153 | + free_memory, total_memory = torch.cuda.mem_get_info() |
| 154 | + return_boolean = ( |
| 155 | + float(free_memory / total_memory) < MEMORY_FREE_THRESHOLD |
| 156 | + ) |
| 157 | + logger.debug( |
| 158 | + f"Free memory: {free_memory}, Total memory: {total_memory}, threshold: {MEMORY_FREE_THRESHOLD}, return_boolean: {return_boolean}" |
| 159 | + ) |
| 160 | + # TODO: Add memory calculation for other non-CUDA devices |
| 161 | + except Exception as e: |
| 162 | + logger.error( |
| 163 | + f"Failed to check CUDA memory pressure: {e}, returning {return_boolean}" |
| 164 | + ) |
| 165 | + return return_boolean |
0 commit comments