-
Notifications
You must be signed in to change notification settings - Fork 163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Memory pressure safety valve #1103
Changes from 3 commits
828997a
0a0934a
cce5971
bf7952f
b1e92c2
c0fa5ad
8565cfd
138bdae
d0131bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,8 @@ | |
from inference.core.managers.base import Model, ModelManager | ||
from inference.core.managers.decorators.base import ModelManagerDecorator | ||
from inference.core.managers.entities import ModelDescription | ||
from inference.core.env import MEMORY_FREE_THRESHOLD | ||
import torch | ||
|
||
|
||
class WithFixedSizeCache(ModelManagerDecorator): | ||
|
@@ -42,7 +44,7 @@ def add_model( | |
return None | ||
|
||
logger.debug(f"Current capacity of ModelManager: {len(self)}/{self.max_size}") | ||
while len(self) >= self.max_size: | ||
while (len(self) >= self.max_size or self.memory_pressure_detected()): | ||
to_remove_model_id = self._key_queue.popleft() | ||
logger.debug( | ||
f"Reached maximum capacity of ModelManager. Unloading model {to_remove_model_id}" | ||
|
@@ -141,3 +143,15 @@ def _resolve_queue_id( | |
self, model_id: str, model_id_alias: Optional[str] = None | ||
) -> str: | ||
return model_id if model_id_alias is None else model_id_alias | ||
|
||
def memory_pressure_detected(self) -> bool: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would feature flag this and import torch locally here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure how this would work in multi-gpu env -but probably we can wait until someone actually uses it like that (seems like we are probing default device here) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (the value of MEMORY_FREE_THRESHOLD = 0 by default, and so the memory pressure is not checked by default. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
return_boolean = False | ||
try: | ||
if torch.cuda.is_available(): | ||
free_memory, total_memory = torch.cuda.mem_get_info() | ||
return_boolean = float(free_memory / total_memory) < MEMORY_FREE_THRESHOLD | ||
logger.debug(f"Free memory: {free_memory}, Total memory: {total_memory}, threshold: {MEMORY_FREE_THRESHOLD}, return_boolean: {return_boolean}") | ||
# TODO: Add memory calculation for other non-CUDA devices | ||
except Exception as e: | ||
logger.error(f"Failed to check CUDA memory pressure: {e}, returning {return_boolean}") | ||
return return_boolean |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
torch is not always installed so shouldnt be imported directly
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed, moved it inside the function.