22#include " mlx/backend/metal/allocator.h"
33#include " mlx/backend/metal/metal.h"
44#include " mlx/backend/metal/metal_impl.h"
5+ #include " mlx/backend/metal/resident.h"
56
67#include < mach/vm_page_size.h>
78#include < unistd.h>
@@ -140,6 +141,7 @@ void BufferCache::remove_from_list(BufferCache::BufferHolder* to_remove) {
140141
141142MetalAllocator::MetalAllocator ()
142143 : device_(device(mlx::core::Device::gpu).mtl_device()),
144+ residency_set_ (device_),
143145 buffer_cache_(device_) {
144146 auto memsize = std::get<size_t >(device_info ()[" memory_size" ]);
145147 block_limit_ =
@@ -148,6 +150,8 @@ MetalAllocator::MetalAllocator()
148150 static_cast <size_t >(0.95 * device_->recommendedMaxWorkingSetSize ()),
149151 block_limit_);
150152 max_pool_size_ = block_limit_;
153+ device (mlx::core::Device::gpu)
154+ .set_residency_set (residency_set_.mtl_residency_set ());
151155}
152156
153157size_t MetalAllocator::set_cache_limit (size_t limit) {
@@ -164,6 +168,12 @@ size_t MetalAllocator::set_memory_limit(size_t limit, bool relaxed) {
164168 return limit;
165169};
166170
171+ size_t MetalAllocator::set_wired_limit (size_t limit) {
172+ std::swap (limit, wired_limit_);
173+ residency_set_.resize (wired_limit_);
174+ return limit;
175+ };
176+
167177Buffer MetalAllocator::malloc (size_t size, bool allow_swap /* = false */ ) {
168178 // Metal doesn't like empty buffers
169179 if (size == 0 ) {
@@ -220,6 +230,8 @@ Buffer MetalAllocator::malloc(size_t size, bool allow_swap /* = false */) {
220230 buffer_cache_.release_cached_buffers (get_cache_memory () - max_pool_size_);
221231 }
222232
233+ residency_set_.insert (buf);
234+
223235 return Buffer{static_cast <void *>(buf)};
224236}
225237
@@ -231,6 +243,7 @@ void MetalAllocator::clear_cache() {
231243void MetalAllocator::free (Buffer buffer) {
232244 auto buf = static_cast <MTL::Buffer*>(buffer.ptr ());
233245 std::unique_lock lk (mutex_);
246+ residency_set_.erase (buf);
234247 active_memory_ -= buf->length ();
235248 if (get_cache_memory () < max_pool_size_) {
236249 buffer_cache_.recycle_to_cache (buf);
@@ -246,15 +259,9 @@ size_t MetalAllocator::size(Buffer buffer) const {
246259}
247260
248261MetalAllocator& allocator () {
249- // By creating the |allocator_| on heap, the destructor of MetalAllocator will
250- // not be called on exit and all the buffers will be leaked. This is necessary
251- // because releasing buffers can take more than 30sec when the program holds a
252- // lot of RAM (for example inferencing a LLM), and it would feel frozen to
253- // users when exiting.
254- // TODO(zcbenz): Consider using the `base::NoDestructor` class from Chromium
255- // when applying this pattern to more places, or when introducing sanitizers
256- // to MLX.
257- // https://source.chromium.org/chromium/chromium/src/+/main:base/no_destructor.h
262+ // By creating the |allocator_| on heap, the destructor of MetalAllocator
263+ // will not be called on exit and buffers in the cache will be leaked. This
264+ // can save some time at program exit.
258265 static MetalAllocator* allocator_ = new MetalAllocator;
259266 return *allocator_;
260267}
@@ -265,6 +272,15 @@ size_t set_cache_limit(size_t limit) {
265272size_t set_memory_limit (size_t limit, bool relaxed /* = true */ ) {
266273 return allocator ().set_memory_limit (limit, relaxed);
267274}
275+ size_t set_wired_limit (size_t limit) {
276+ if (limit >
277+ std::get<size_t >(device_info ()[" max_recommended_working_set_size" ])) {
278+ throw std::invalid_argument (
279+ " [metal::set_wired_limit] Setting a wired limit larger than "
280+ " the maximum working set size is not allowed." );
281+ }
282+ return allocator ().set_wired_limit (limit);
283+ }
268284size_t get_active_memory () {
269285 return allocator ().get_active_memory ();
270286}
0 commit comments