Skip to content

Commit 914575e

Browse files
committed
address comments
1 parent e4bc029 commit 914575e

9 files changed

+170
-244
lines changed

conanfile.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
class HomeObjectConan(ConanFile):
1111
name = "homeobject"
12-
version = "2.3.19"
12+
version = "2.3.20"
1313

1414
homepage = "https://github.com/eBay/HomeObject"
1515
description = "Blob Store built on HomeReplication"

src/lib/homestore_backend/gc_manager.cpp

+109-36
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,91 @@ namespace homeobject {
77
/* GCManager */
88

99
GCManager::GCManager(std::shared_ptr< HeapChunkSelector > chunk_selector, HSHomeObject* homeobject) :
10-
m_chunk_selector{chunk_selector}, m_hs_home_object{homeobject} {}
10+
m_chunk_selector{chunk_selector}, m_hs_home_object{homeobject} {
11+
homestore::HomeStore::instance()->meta_service().register_handler(
12+
GCManager::_gc_actor_meta_name,
13+
[this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
14+
on_gc_actor_meta_blk_found(std::move(buf), voidptr_cast(mblk));
15+
},
16+
nullptr, true);
17+
18+
homestore::HomeStore::instance()->meta_service().register_handler(
19+
GCManager::_gc_reserved_chunk_meta_name,
20+
[this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
21+
on_reserved_chunk_meta_blk_found(std::move(buf), voidptr_cast(mblk));
22+
},
23+
[this](bool success) {
24+
RELEASE_ASSERT(success, "Failed to recover all reserved chunk!!!");
25+
// we need to guarantee that pg meta blk is recovered before we start recover reserved chunk
26+
m_chunk_selector->build_pdev_available_chunk_heap();
27+
},
28+
true);
29+
30+
homestore::HomeStore::instance()->meta_service().register_handler(
31+
GCManager::_gc_task_meta_name,
32+
[this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
33+
on_gc_task_meta_blk_found(std::move(buf), voidptr_cast(mblk));
34+
},
35+
nullptr, true);
36+
}
37+
38+
void GCManager::on_gc_task_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie) {
39+
homestore::superblk< GCManager::gc_task_superblk > gc_task_sb(GCManager::_gc_task_meta_name);
40+
const auto pending_gc_task = gc_task_sb.load(buf, meta_cookie);
41+
42+
// if a gc_task_super blk is found, we can make sure that all the valid data in move_from_chunk has been copied to
43+
// move_to_chunk, and all the blob -> (new pba) have been written to the gc index table. Now, what we need to do is
44+
// just updating blob indexes in pg index table according to the blob indexes in gc index table.
45+
46+
// pg_index_table: [pg_id, shard_id, blob_id] -> old pba
47+
// gc_index_table: [move_to_chunk_id, pg_id, shard_id, blob_id] -> new pba
48+
49+
// we need to find all keys with the prefix of move_to_chunk_id in gc index table, and update the corrsponding
50+
// keys(same pg_id + shard_id + blob_id) in pg index table with the new pba.
51+
auto pdev_id = m_chunk_selector->get_extend_vchunk(pending_gc_task->move_from_chunk)->get_pdev_id();
52+
auto gc_actor = get_pdev_gc_actor(pdev_id);
53+
RELEASE_ASSERT(gc_actor, "can not get gc actor for pdev {}!", pdev_id);
54+
gc_actor->handle_recovered_gc_task(pending_gc_task);
55+
56+
// delete gc task meta blk, so that it will not be recovered again if restart
57+
gc_task_sb.destroy();
58+
}
59+
60+
void GCManager::on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie) {
61+
homestore::superblk< GCManager::gc_actor_superblk > gc_actor_sb(GCManager::_gc_actor_meta_name);
62+
gc_actor_sb.load(buf, meta_cookie);
63+
auto pdev_id = gc_actor_sb->pdev_id;
64+
auto index_table_uuid = gc_actor_sb->index_table_uuid;
65+
66+
auto gc_index_table = m_hs_home_object->get_gc_index_table(boost::uuids::to_string(index_table_uuid));
67+
68+
RELEASE_ASSERT(gc_index_table, "can not get gc index table for pdev {} with uuid {}!", pdev_id,
69+
boost::uuids::to_string(index_table_uuid));
70+
71+
// create a gc actor for this pdev if not exists
72+
auto gc_actor = try_create_pdev_gc_actor(pdev_id, gc_index_table);
73+
RELEASE_ASSERT(gc_actor, "can not get gc actor for pdev {}!", pdev_id);
74+
}
75+
76+
void GCManager::on_reserved_chunk_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie) {
77+
homestore::superblk< GCManager::gc_reserved_chunk_superblk > reserved_chunk_sb(
78+
GCManager::_gc_reserved_chunk_meta_name);
79+
reserved_chunk_sb.load(buf, meta_cookie);
80+
auto chunk_id = reserved_chunk_sb->chunk_id;
81+
auto pdev_id = m_chunk_selector->get_extend_vchunk(chunk_id)->get_pdev_id();
82+
auto gc_actor = get_pdev_gc_actor(pdev_id);
83+
RELEASE_ASSERT(gc_actor, "can not get gc actor for pdev {}!", pdev_id);
84+
gc_actor->add_reserved_chunk(chunk_id);
85+
// mark a reserved chunk as gc state, so that it will not be selected as a gc candidate
86+
m_chunk_selector->try_mark_chunk_to_gc_state(chunk_id, true /* force */);
87+
}
1188

1289
GCManager::~GCManager() { stop(); }
1390

1491
void GCManager::start() {
1592
for (const auto& [pdev_id, gc_actor] : m_pdev_gc_actors) {
1693
gc_actor->start();
17-
LOGERROR("start gc actor for pdev={}", pdev_id);
94+
LOGINFO("start gc actor for pdev={}", pdev_id);
1895
}
1996

2097
m_gc_timer_hdl = iomanager.schedule_global_timer(
@@ -72,12 +149,22 @@ std::shared_ptr< GCManager::pdev_gc_actor > GCManager::get_pdev_gc_actor(uint32_
72149
return it->second;
73150
}
74151

75-
bool GCManager::is_eligible_for_gc(std::shared_ptr< HeapChunkSelector::ExtendedVChunk > chunk) {
152+
bool GCManager::is_eligible_for_gc(chunk_id_t chunk_id) {
153+
auto chunk = m_chunk_selector->get_extend_vchunk(chunk_id);
154+
76155
// 1 if the chunk state is inuse, it is occupied by a open shard, so it can not be selected and we don't need gc it.
77156
// 2 if the chunk state is gc, it means this chunk is being gc, or this is a reserved chunk, so we don't need gc it.
78-
if (chunk->m_state != ChunkState::AVAILABLE) return false;
157+
if (chunk->m_state != ChunkState::AVAILABLE) {
158+
LOGINFO("chunk_id={} state is {}, not eligible for gc", chunk_id, chunk->m_state)
159+
return false;
160+
}
79161
// it does not belong to any pg, so we don't need to gc it.
80-
if (!chunk->m_pg_id.has_value()) return false;
162+
if (!chunk->m_pg_id.has_value()) {
163+
LOGINFO("chunk_id={} belongs to no pg, not eligible for gc", chunk_id)
164+
return false;
165+
}
166+
167+
LOGINFO("chunk_id={} is eligible for gc, belongs to pg {}", chunk_id, chunk->m_pg_id.value());
81168

82169
auto defrag_blk_num = chunk->get_defrag_nblks();
83170
auto total_blk_num = chunk->get_total_blks();
@@ -88,21 +175,21 @@ bool GCManager::is_eligible_for_gc(std::shared_ptr< HeapChunkSelector::ExtendedV
88175
}
89176

90177
void GCManager::scan_chunks_for_gc() {
91-
// in every iteration, we will select at most 2 * RESERVED_CHUNK_NUM_PER_PDEV gc tasks
92-
auto max_task_num = 2 * (RESERVED_CHUNK_NUM_PER_PDEV - RESERVED_CHUNK_NUM_DEDICATED_FOR_EGC);
93-
94-
for (const auto& [_, chunk] : m_chunk_selector->get_all_chunks()) {
95-
if (is_eligible_for_gc(chunk)) {
96-
auto pdev_id = chunk->get_pdev_id();
97-
auto it = m_pdev_gc_actors.find(pdev_id);
98-
if (it != m_pdev_gc_actors.end()) {
99-
auto& actor = it->second;
100-
auto chunk_id = chunk->get_chunk_id();
178+
for (const auto& [pdev_id, chunks] : m_chunk_selector->get_pdev_chunks()) {
179+
// in every iteration, we will select at most 2 * RESERVED_CHUNK_NUM_PER_PDEV gc tasks
180+
auto max_task_num = 2 * (RESERVED_CHUNK_NUM_PER_PDEV - RESERVED_CHUNK_NUM_DEDICATED_FOR_EGC);
181+
auto it = m_pdev_gc_actors.find(pdev_id);
182+
RELEASE_ASSERT(it != m_pdev_gc_actors.end(), "can not find gc actor for pdev_id {} when scanning chunks for gc",
183+
pdev_id);
184+
auto& actor = it->second;
185+
186+
for (const auto& chunk_id : chunks) {
187+
if (is_eligible_for_gc(chunk_id)) {
101188
auto future = actor->add_gc_task(static_cast< uint8_t >(task_priority::normal), chunk_id);
102189
if (future.isReady()) {
103190
// future is not expected to be ready immediately. if it is ready here, it probably means failing to
104191
// add gc task. then we try to add one more.
105-
LOGWARN("failed to add gc task for chunk_id={} on pdev_id={}, return value= {}", chunk_id, pdev_id,
192+
LOGWARN("failed to add gc task for chunk_id={} on pdev_id={}, return value={}", chunk_id, pdev_id,
106193
future.value());
107194
} else if (0 == --max_task_num) {
108195
LOGINFO("reached max gc task limit for pdev_id={}, stopping further gc task submissions", pdev_id);
@@ -146,7 +233,7 @@ void GCManager::pdev_gc_actor::start() {
146233
void GCManager::pdev_gc_actor::stop() {
147234
bool stopped = false;
148235
if (!m_is_stopped.compare_exchange_strong(stopped, true, std::memory_order_release, std::memory_order_relaxed)) {
149-
LOGERROR("pdev gc actor for pdev_id={} is already stopped, no need to stop again!", m_pdev_id);
236+
LOGWARN("pdev gc actor for pdev_id={} is already stopped, no need to stop again!", m_pdev_id);
150237
return;
151238
}
152239
m_gc_executor->stop();
@@ -168,12 +255,12 @@ folly::SemiFuture< bool > GCManager::pdev_gc_actor::add_gc_task(uint8_t priority
168255

169256
if (sisl_unlikely(priority == static_cast< uint8_t >(task_priority::emergent))) {
170257
m_egc_executor->add([this, priority, move_from_chunk, promise = std::move(promise)]() mutable {
171-
LOGERROR("start gc task : move_from_chunk_id={}, priority={}", move_from_chunk, priority);
258+
LOGINFO("start emergent gc task : move_from_chunk_id={}, priority={}", move_from_chunk, priority);
172259
process_gc_task(move_from_chunk, priority, std::move(promise));
173260
});
174261
} else {
175262
m_gc_executor->add([this, priority, move_from_chunk, promise = std::move(promise)]() mutable {
176-
LOGERROR("start gc task : move_from_chunk_id={}, priority={}", move_from_chunk, priority);
263+
LOGINFO("start gc task : move_from_chunk_id={}, priority={}", move_from_chunk, priority);
177264
process_gc_task(move_from_chunk, priority, std::move(promise));
178265
});
179266
}
@@ -256,8 +343,8 @@ bool GCManager::pdev_gc_actor::copy_valid_data(chunk_id_t move_from_chunk, chunk
256343

257344
void GCManager::pdev_gc_actor::purge_reserved_chunk(chunk_id_t chunk) {
258345
auto vchunk = m_chunk_selector->get_extend_vchunk(chunk);
259-
RELEASE_ASSERT(!vchunk->m_pg_id.has_value(), "chunk_id={} is expected to a reserved chunk, and not belong to a pg",
260-
chunk);
346+
RELEASE_ASSERT(!vchunk->m_pg_id.has_value(),
347+
"chunk_id={} is expected to be a reserved chunk, and not belong to a pg", chunk);
261348
vchunk->reset(); // reset the chunk to make sure it is empty
262349

263350
// clear all the entries of this chunk in the gc index table
@@ -277,22 +364,8 @@ void GCManager::pdev_gc_actor::purge_reserved_chunk(chunk_id_t chunk) {
277364

278365
void GCManager::pdev_gc_actor::process_gc_task(chunk_id_t move_from_chunk, uint8_t priority,
279366
folly::Promise< bool > task) {
280-
auto move_from_vchunk = m_chunk_selector->get_extend_vchunk(move_from_chunk);
281-
282367
LOGINFO("start process gc task for move_from_chunk={} with priority={} ", move_from_chunk, priority);
283368

284-
// we need to select the move_from_chunk out of the per pg chunk heap in chunk selector if it is a gc task with
285-
// normal priority. for the task with emergent priority, it is already selected since it is now used for an open
286-
// shard.
287-
288-
if (!GCManager::is_eligible_for_gc(move_from_vchunk)) {
289-
LOGWARN("move_from_chunk={} is not eligible for gc, so we can not process the gc task", move_from_chunk);
290-
task.setValue(false);
291-
return;
292-
}
293-
294-
LOGINFO("move_from_chunk={} belongs to pg {} ", move_from_chunk, move_from_vchunk->m_pg_id.value());
295-
296369
// make chunk to gc state, so that it can be select for creating shard
297370
auto succeed = m_chunk_selector->try_mark_chunk_to_gc_state(
298371
move_from_chunk, priority == static_cast< uint8_t >(task_priority::emergent) /* force */);
@@ -353,7 +426,7 @@ void GCManager::pdev_gc_actor::process_gc_task(chunk_id_t move_from_chunk, uint8
353426

354427
GCManager::pdev_gc_actor::~pdev_gc_actor() {
355428
stop();
356-
LOGINFO("pdev gc actor for pdev_id={} is destroyed", m_pdev_id);
429+
LOGINFO("gc actor for pdev_id={} is destroyed", m_pdev_id);
357430
}
358431

359432
/* RateLimiter */

src/lib/homestore_backend/gc_manager.hpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,20 @@ class GCManager {
173173
*/
174174
std::shared_ptr< pdev_gc_actor > try_create_pdev_gc_actor(uint32_t pdev_id,
175175
std::shared_ptr< GCBlobIndexTable > index_table);
176-
std::shared_ptr< pdev_gc_actor > get_pdev_gc_actor(uint32_t pdev_id);
177176

178-
static bool is_eligible_for_gc(std::shared_ptr< HeapChunkSelector::ExtendedVChunk > chunk);
177+
bool is_eligible_for_gc(chunk_id_t chunk_id);
179178

180179
void start();
181180
void stop();
182181

183182
private:
184183
void scan_chunks_for_gc();
185184

185+
void on_gc_task_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie);
186+
void on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie);
187+
void on_reserved_chunk_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie);
188+
std::shared_ptr< pdev_gc_actor > get_pdev_gc_actor(uint32_t pdev_id);
189+
186190
private:
187191
std::shared_ptr< HeapChunkSelector > m_chunk_selector;
188192
folly::ConcurrentHashMap< uint32_t, std::shared_ptr< pdev_gc_actor > > m_pdev_gc_actors;

src/lib/homestore_backend/heap_chunk_selector.cpp

+11-78
Original file line numberDiff line numberDiff line change
@@ -310,11 +310,11 @@ bool HeapChunkSelector::recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_
310310
return true;
311311
}
312312

313-
void HeapChunkSelector::recover_per_dev_chunk_heap() {
313+
void HeapChunkSelector::build_pdev_available_chunk_heap() {
314314
std::unique_lock lock_guard(m_chunk_selector_mtx);
315315
for (auto [p_chunk_id, chunk] : m_chunks) {
316-
// if selected for pg, not add to pdev.
317-
bool add_to_heap = !chunk->m_pg_id.has_value();
316+
// if selected for pg, or it is marked as GC state(reserved chunk), not add to pdev.
317+
bool add_to_heap = !chunk->m_pg_id.has_value() && chunk->m_state != ChunkState::GC;
318318
add_chunk_internal(p_chunk_id, add_to_heap);
319319
}
320320
}
@@ -438,85 +438,18 @@ uint64_t HeapChunkSelector::total_blks(uint32_t dev_id) const {
438438
return it->second->m_total_blks;
439439
}
440440

441-
std::list< uint32_t > HeapChunkSelector::get_pdev_ids() const {
442-
std::list< uint32_t > pdev_ids;
443-
for (const auto& [pdev_id, _] : m_per_dev_heap) {
444-
pdev_ids.emplace_back(pdev_id);
441+
std::unordered_map< uint32_t, std::vector< homestore::chunk_num_t > > HeapChunkSelector::get_pdev_chunks() const {
442+
std::unordered_map< uint32_t, std::vector< homestore::chunk_num_t > > pdev_chunks;
443+
for (const auto& [_, EXvchunk] : m_chunks) {
444+
auto pdev_id = EXvchunk->get_pdev_id();
445+
pdev_chunks.try_emplace(pdev_id, std::vector< homestore::chunk_num_t >());
446+
pdev_chunks[pdev_id].emplace_back(EXvchunk->get_chunk_id());
445447
}
446-
return pdev_ids;
447-
}
448-
449-
std::shared_ptr< HeapChunkSelector::ExtendedVChunk > HeapChunkSelector::select_empty_chunk_from_pdev(uint32_t pdev_id) {
450-
std::unique_lock lock_guard(m_chunk_selector_mtx);
451-
auto it = m_per_dev_heap.find(pdev_id);
452-
if (it == m_per_dev_heap.end()) {
453-
LOGWARNMOD(homeobject, "No pdev heap found for pdev {}", pdev_id);
454-
return nullptr;
455-
}
456-
auto pdev_heap = it->second;
457-
std::unique_lock lock(pdev_heap->mtx);
458-
if (pdev_heap->m_heap.empty()) {
459-
LOGWARNMOD(homeobject, "No available chunk found for pdev {}", pdev_id);
460-
return nullptr;
461-
}
462-
auto chunk = pdev_heap->m_heap.top();
463-
pdev_heap->m_heap.pop();
464-
pdev_heap->available_blk_count -= chunk->available_blks();
465-
RELEASE_ASSERT(chunk->available_blks() == chunk->get_total_blks(),
466-
"the selected chunk should be empty, chunk_id={}, pdev_id={}", chunk->get_chunk_id(), pdev_id);
467-
RELEASE_ASSERT(!chunk->m_pg_id.has_value(), "chunk {} is selected from pdev heap, but it has a pg_id {}!",
468-
chunk->get_chunk_id(), chunk->m_pg_id.value());
469-
470-
return chunk;
471-
}
472-
473-
std::shared_ptr< HeapChunkSelector::ExtendedVChunk >
474-
HeapChunkSelector::select_specific_chunk_from_pdev(uint32_t pdev_id, const chunk_num_t chunk_id) {
475-
std::unique_lock lock_guard(m_chunk_selector_mtx);
476-
auto it = m_per_dev_heap.find(pdev_id);
477-
if (it == m_per_dev_heap.end()) {
478-
LOGWARNMOD(homeobject, "No pdev heap found for pdev {}", pdev_id);
479-
return nullptr;
480-
}
481-
auto pdev_heap = it->second;
482-
std::shared_ptr< HeapChunkSelector::ExtendedVChunk > EVchunk;
483-
std::list< std::shared_ptr< HeapChunkSelector::ExtendedVChunk > > EVchunks;
484-
485-
std::unique_lock lock(pdev_heap->mtx);
486-
for (; pdev_heap->m_heap.empty();) {
487-
EVchunk = pdev_heap->m_heap.top();
488-
pdev_heap->m_heap.pop();
489-
if (EVchunk->get_chunk_id() == chunk_id) {
490-
break;
491-
} else {
492-
EVchunks.emplace_back(EVchunk);
493-
}
494-
}
495-
496-
for (const auto& chunk : EVchunks) {
497-
pdev_heap->m_heap.emplace(chunk);
498-
}
499-
500-
if (!EVchunk) {
501-
LOGWARNMOD(homeobject, "No available chunk found for pdev {}, chunk_id {}", pdev_id, chunk_id);
502-
return nullptr;
503-
}
504-
505-
pdev_heap->available_blk_count -= EVchunk->available_blks();
506-
507-
RELEASE_ASSERT(!EVchunk->m_pg_id.has_value(), "chunk {} is selected from pdev heap, but it has a pg_id {}!",
508-
EVchunk->get_chunk_id(), EVchunk->m_pg_id.value());
509-
510-
return EVchunk;
511-
}
512-
513-
const std::unordered_map< homestore::chunk_num_t, homestore::cshared< HeapChunkSelector::ExtendedVChunk > >&
514-
HeapChunkSelector::get_all_chunks() const {
515-
return m_chunks;
448+
return pdev_chunks;
516449
}
517450

518451
homestore::cshared< HeapChunkSelector::ExtendedVChunk >
519-
HeapChunkSelector::get_extend_vchunk(const chunk_num_t chunk_id) const {
452+
HeapChunkSelector::get_extend_vchunk(const homestore::chunk_num_t chunk_id) const {
520453
auto it = m_chunks.find(chunk_id);
521454
if (it != m_chunks.end()) { return it->second; }
522455
return nullptr;

src/lib/homestore_backend/heap_chunk_selector.h

+3-11
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include <queue>
1212
#include <vector>
13+
#include <unordered_set>
1314
#include <mutex>
1415
#include <functional>
1516
#include <atomic>
@@ -128,7 +129,7 @@ class HeapChunkSelector : public homestore::ChunkSelector {
128129
bool recover_pg_chunks(pg_id_t pg_id, std::vector< chunk_num_t >&& p_chunk_ids);
129130

130131
// this should be called after all pg meta blk recovered
131-
void recover_per_dev_chunk_heap();
132+
void build_pdev_available_chunk_heap();
132133

133134
// this should be called after ShardManager is initialized and get all the open shards
134135
bool recover_pg_chunks_states(pg_id_t pg_id, const std::unordered_set< chunk_num_t >& excluding_v_chunk_ids);
@@ -193,16 +194,7 @@ class HeapChunkSelector : public homestore::ChunkSelector {
193194
/**
194195
* @brief Returns all the pdev ids that managed by this chunk selector.
195196
*/
196-
std::list< uint32_t > get_pdev_ids() const;
197-
198-
/**
199-
* @brief select an available chunk from the given pdev.
200-
*/
201-
std::shared_ptr< ExtendedVChunk > select_empty_chunk_from_pdev(uint32_t pdev_id);
202-
203-
std::shared_ptr< ExtendedVChunk > select_specific_chunk_from_pdev(uint32_t pdev_id, const chunk_num_t chunk_id);
204-
205-
const std::unordered_map< chunk_num_t, homestore::cshared< ExtendedVChunk > >& get_all_chunks() const;
197+
std::unordered_map< uint32_t, std::vector< chunk_num_t > > get_pdev_chunks() const;
206198

207199
homestore::cshared< ExtendedVChunk > get_extend_vchunk(const chunk_num_t chunk_id) const;
208200

0 commit comments

Comments
 (0)