Skip to content

Commit 8a85bc4

Browse files
authored
fix: fixes async warmup deadlock (#41995)
issue: #41993 also updated cachinglayer metrics Signed-off-by: Buqian Zheng <[email protected]>
1 parent f20e085 commit 8a85bc4

File tree

6 files changed

+36
-45
lines changed

6 files changed

+36
-45
lines changed

internal/core/src/cachinglayer/CacheSlot.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,10 @@ class CacheSlot final : public std::enable_shared_from_this<CacheSlot<CellT>> {
9090
SemiInlineGet(PinCells(std::move(cids)));
9191
break;
9292
case CacheWarmupPolicy::CacheWarmupPolicy_Async:
93+
// PinCells submits tasks to middle priority thread pool, thus here we submit to
94+
// low priority thread pool to avoid dead lock.
9395
auto& pool = milvus::ThreadPools::GetThreadPool(
94-
milvus::ThreadPoolPriority::MIDDLE);
96+
milvus::ThreadPoolPriority::LOW);
9597
pool.Submit([this, cids = std::move(cids)]() mutable {
9698
SemiInlineGet(PinCells(std::move(cids)));
9799
});

internal/core/src/cachinglayer/Utils.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,19 +304,24 @@ cache_op_result_count_miss(StorageType storage_type) {
304304
}
305305

306306
inline prometheus::Counter&
307-
cache_eviction_count(StorageType storage_type) {
307+
cache_cell_eviction_count(StorageType storage_type) {
308308
switch (storage_type) {
309309
case StorageType::MEMORY:
310-
return monitor::internal_cache_eviction_count_memory;
310+
return monitor::internal_cache_cell_eviction_count_memory;
311311
case StorageType::DISK:
312-
return monitor::internal_cache_eviction_count_disk;
312+
return monitor::internal_cache_cell_eviction_count_disk;
313313
case StorageType::MIXED:
314-
return monitor::internal_cache_eviction_count_mixed;
314+
return monitor::internal_cache_cell_eviction_count_mixed;
315315
default:
316316
PanicInfo(ErrorCode::UnexpectedError, "Unknown StorageType");
317317
}
318318
}
319319

320+
inline prometheus::Counter&
321+
cache_eviction_event_count() {
322+
return monitor::internal_cache_eviction_event_count_all;
323+
}
324+
320325
inline prometheus::Histogram&
321326
cache_item_lifetime_seconds(StorageType storage_type) {
322327
switch (storage_type) {

internal/core/src/cachinglayer/lrucache/DList.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,10 @@ DList::tryEvict(const ResourceUsage& expected_eviction,
160160
usageInfo(actively_pinned));
161161
}
162162

163+
internal::cache_eviction_event_count().Increment();
163164
for (auto* list_node : to_evict) {
164165
auto size = list_node->size();
165-
internal::cache_eviction_count(size.storage_type()).Increment();
166+
internal::cache_cell_eviction_count(size.storage_type()).Increment();
166167
popItem(list_node);
167168
list_node->clear_data();
168169
used_memory_ -= size;

internal/core/src/monitor/prometheus_client.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -454,18 +454,25 @@ DEFINE_PROMETHEUS_GAUGE(internal_cache_capacity_bytes_mixed,
454454
cacheMixedLabel);
455455

456456
// Eviction count and resource size
457-
DEFINE_PROMETHEUS_COUNTER_FAMILY(internal_cache_eviction_count,
458-
"[cpp]cache eviction count");
459-
DEFINE_PROMETHEUS_COUNTER(internal_cache_eviction_count_memory,
460-
internal_cache_eviction_count,
457+
DEFINE_PROMETHEUS_COUNTER_FAMILY(internal_cache_cell_eviction_count,
458+
"[cpp]cache cell eviction count");
459+
DEFINE_PROMETHEUS_COUNTER(internal_cache_cell_eviction_count_memory,
460+
internal_cache_cell_eviction_count,
461461
cacheMemoryLabel);
462-
DEFINE_PROMETHEUS_COUNTER(internal_cache_eviction_count_disk,
463-
internal_cache_eviction_count,
462+
DEFINE_PROMETHEUS_COUNTER(internal_cache_cell_eviction_count_disk,
463+
internal_cache_cell_eviction_count,
464464
cacheDiskLabel);
465-
DEFINE_PROMETHEUS_COUNTER(internal_cache_eviction_count_mixed,
466-
internal_cache_eviction_count,
465+
DEFINE_PROMETHEUS_COUNTER(internal_cache_cell_eviction_count_mixed,
466+
internal_cache_cell_eviction_count,
467467
cacheMixedLabel);
468468

469+
// Eviction event count
470+
DEFINE_PROMETHEUS_COUNTER_FAMILY(internal_cache_eviction_event_count,
471+
"[cpp]cache eviction event count");
472+
DEFINE_PROMETHEUS_COUNTER(internal_cache_eviction_event_count_all,
473+
internal_cache_eviction_event_count,
474+
{});
475+
469476
DEFINE_PROMETHEUS_COUNTER_FAMILY(internal_cache_evicted_bytes,
470477
"[cpp]total bytes evicted from cache");
471478
DEFINE_PROMETHEUS_COUNTER(internal_cache_evicted_bytes_memory,

internal/core/src/monitor/prometheus_client.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,18 @@ DECLARE_PROMETHEUS_GAUGE_FAMILY(internal_cache_capacity_bytes);
197197
DECLARE_PROMETHEUS_GAUGE(internal_cache_capacity_bytes_memory);
198198
DECLARE_PROMETHEUS_GAUGE(internal_cache_capacity_bytes_disk);
199199

200-
DECLARE_PROMETHEUS_COUNTER_FAMILY(internal_cache_eviction_count);
201-
DECLARE_PROMETHEUS_COUNTER(internal_cache_eviction_count_memory);
202-
DECLARE_PROMETHEUS_COUNTER(internal_cache_eviction_count_disk);
203-
DECLARE_PROMETHEUS_COUNTER(internal_cache_eviction_count_mixed);
200+
DECLARE_PROMETHEUS_COUNTER_FAMILY(internal_cache_cell_eviction_count);
201+
DECLARE_PROMETHEUS_COUNTER(internal_cache_cell_eviction_count_memory);
202+
DECLARE_PROMETHEUS_COUNTER(internal_cache_cell_eviction_count_disk);
203+
DECLARE_PROMETHEUS_COUNTER(internal_cache_cell_eviction_count_mixed);
204204

205205
DECLARE_PROMETHEUS_COUNTER_FAMILY(internal_cache_evicted_bytes);
206206
DECLARE_PROMETHEUS_COUNTER(internal_cache_evicted_bytes_memory);
207207
DECLARE_PROMETHEUS_COUNTER(internal_cache_evicted_bytes_disk);
208208

209+
DECLARE_PROMETHEUS_COUNTER_FAMILY(internal_cache_eviction_event_count);
210+
DECLARE_PROMETHEUS_COUNTER(internal_cache_eviction_event_count_all);
211+
209212
DECLARE_PROMETHEUS_HISTOGRAM_FAMILY(internal_cache_item_lifetime_seconds);
210213
DECLARE_PROMETHEUS_HISTOGRAM(internal_cache_item_lifetime_seconds_memory);
211214
DECLARE_PROMETHEUS_HISTOGRAM(internal_cache_item_lifetime_seconds_disk);

internal/core/src/storage/ThreadPool.h

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -131,31 +131,4 @@ class ThreadPool {
131131
std::string name_;
132132
};
133133

134-
class Worker {
135-
private:
136-
int id_;
137-
ThreadPool* pool_;
138-
139-
public:
140-
Worker(ThreadPool* pool, const int id) : pool_(pool), id_(id) {
141-
}
142-
143-
void
144-
operator()() {
145-
std::function<void()> func;
146-
bool dequeue;
147-
while (!pool_->shutdown_) {
148-
std::unique_lock<std::mutex> lock(pool_->mutex_);
149-
if (pool_->work_queue_.empty()) {
150-
pool_->condition_lock_.wait(lock);
151-
}
152-
dequeue = pool_->work_queue_.dequeue(func);
153-
lock.unlock();
154-
if (dequeue) {
155-
func();
156-
}
157-
}
158-
}
159-
};
160-
161134
} // namespace milvus

0 commit comments

Comments
 (0)