Skip to content

Commit 5bd342b

Browse files
committed
[Fix](recycler) Further fix for apache#47475 (apache#47486)
Related PR: apache#47475 在pr#47475中,我们修复了潜在的少删数据的问题,是通过在delete rowset data函数中添加删除逻辑,删除recycle tablet中漏删的文件,但是那个pr忽略了其中存在的一个判断条件,导致在recycle tablet中漏删的文件被跳过了,没有实际删除。 在此pr中,tmp rowset不再依赖于上述判断条件,尽可能删除每一个rowset数据,包括倒排索引v2的数据,但是普通的rowset不会跳过这个判断条件,因为普通rowset数量过大,如果不跳过,可能会影响删除效率。
1 parent 3c594a5 commit 5bd342b

File tree

3 files changed

+25
-12
lines changed

3 files changed

+25
-12
lines changed

cloud/src/recycler/recycler.cpp

+15-9
Original file line numberDiff line numberDiff line change
@@ -1464,15 +1464,18 @@ int InstanceRecycler::delete_rowset_data(const doris::RowsetMetaCloudPB& rs_meta
14641464
return accessor->delete_files(file_paths);
14651465
}
14661466

1467-
int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaCloudPB>& rowsets) {
1467+
int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaCloudPB>& rowsets,
1468+
RowsetRecyclingState type) {
14681469
int ret = 0;
14691470
// resource_id -> file_paths
14701471
std::map<std::string, std::vector<std::string>> resource_file_paths;
14711472
// (resource_id, tablet_id, rowset_id)
14721473
std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
14731474

14741475
for (const auto& rs : rowsets) {
1475-
{
1476+
// we have to treat tmp rowset as "orphans" that may not related to any existing tablets
1477+
// due to aborted schema change.
1478+
if (type == RowsetRecyclingState::FORMAL_ROWSET) {
14761479
std::lock_guard lock(recycled_tablets_mtx_);
14771480
if (recycled_tablets_.count(rs.tablet_id())) {
14781481
continue; // Rowset data has already been deleted
@@ -1499,7 +1502,7 @@ int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaClou
14991502
std::vector<std::pair<int64_t, std::string>> index_ids;
15001503
// default format as v1.
15011504
InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
1502-
1505+
int inverted_index_get_ret = 0;
15031506
if (rs.has_tablet_schema()) {
15041507
for (const auto& index : rs.tablet_schema().index()) {
15051508
if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
@@ -1519,12 +1522,12 @@ int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaClou
15191522
continue;
15201523
}
15211524
InvertedIndexInfo index_info;
1522-
int get_ret =
1525+
inverted_index_get_ret =
15231526
inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
1524-
if (get_ret == 0) {
1527+
if (inverted_index_get_ret == 0) {
15251528
index_format = index_info.first;
15261529
index_ids = index_info.second;
1527-
} else if (get_ret == 1) {
1530+
} else if (inverted_index_get_ret == 1) {
15281531
// 1. Schema kv not found means tablet has been recycled
15291532
// Maybe some tablet recycle failed by some bugs
15301533
// We need to delete again to double check
@@ -1562,7 +1565,10 @@ int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaClou
15621565
file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
15631566
index_id.first, index_id.second));
15641567
}
1565-
} else if (!index_ids.empty()) {
1568+
} else if (!index_ids.empty() || inverted_index_get_ret == 1) {
1569+
// try to recycle inverted index v2 when get_ret == 1
1570+
// we treat schema not found as if it has a v2 format inverted index
1571+
// to reduce chance of data leakage
15661572
file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
15671573
}
15681574
}
@@ -2028,7 +2034,7 @@ int InstanceRecycler::recycle_rowsets() {
20282034
rowsets_to_delete.swap(rowsets);
20292035
worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
20302036
rowsets_to_delete = std::move(rowsets_to_delete)]() {
2031-
if (delete_rowset_data(rowsets_to_delete) != 0) {
2037+
if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) {
20322038
LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
20332039
return;
20342040
}
@@ -2225,7 +2231,7 @@ int InstanceRecycler::recycle_tmp_rowsets() {
22252231
tmp_rowset_keys.clear();
22262232
tmp_rowsets.clear();
22272233
});
2228-
if (delete_rowset_data(tmp_rowsets) != 0) {
2234+
if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET) != 0) {
22292235
LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
22302236
return -1;
22312237
}

cloud/src/recycler/recycler.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ class Recycler {
110110
std::shared_ptr<TxnLazyCommitter> txn_lazy_committer_;
111111
};
112112

113+
enum class RowsetRecyclingState {
114+
FORMAL_ROWSET,
115+
TMP_ROWSET,
116+
};
117+
113118
class InstanceRecycler {
114119
public:
115120
explicit InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
@@ -222,7 +227,8 @@ class InstanceRecycler {
222227
const std::string& rowset_id);
223228

224229
// return 0 for success otherwise error
225-
int delete_rowset_data(const std::vector<doris::RowsetMetaCloudPB>& rowsets);
230+
int delete_rowset_data(const std::vector<doris::RowsetMetaCloudPB>& rowsets,
231+
RowsetRecyclingState type);
226232

227233
/**
228234
* Get stage storage info from instance and init StorageVaultAccessor

cloud/test/recycler_test.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1129,6 +1129,7 @@ TEST(RecyclerTest, recycle_indexes) {
11291129
j & 1);
11301130
auto tmp_rowset = create_rowset("recycle_tmp_rowsets", tablet_id, index_id, 5,
11311131
schemas[j % 5], txn_id_base + j);
1132+
tmp_rowset.set_resource_id("recycle_indexes");
11321133
create_tmp_rowset(txn_kv.get(), accessor.get(), tmp_rowset, j & 1);
11331134
}
11341135
for (int j = 0; j < 10; ++j) {
@@ -3132,7 +3133,7 @@ TEST(RecyclerTest, delete_rowset_data) {
31323133

31333134
rowset_pbs.emplace_back(std::move(rowset));
31343135
}
3135-
ASSERT_EQ(0, recycler.delete_rowset_data(rowset_pbs));
3136+
ASSERT_EQ(0, recycler.delete_rowset_data(rowset_pbs, RowsetRecyclingState::FORMAL_ROWSET));
31363137
std::unique_ptr<ListIterator> list_iter;
31373138
ASSERT_EQ(0, accessor->list_all(&list_iter));
31383139
ASSERT_FALSE(list_iter->has_next());
@@ -3237,7 +3238,7 @@ TEST(RecyclerTest, delete_rowset_data_without_inverted_index_storage_format) {
32373238

32383239
rowset_pbs.emplace_back(std::move(rowset));
32393240
}
3240-
ASSERT_EQ(0, recycler.delete_rowset_data(rowset_pbs));
3241+
ASSERT_EQ(0, recycler.delete_rowset_data(rowset_pbs, RowsetRecyclingState::FORMAL_ROWSET));
32413242
std::unique_ptr<ListIterator> list_iter;
32423243
ASSERT_EQ(0, accessor->list_all(&list_iter));
32433244
ASSERT_FALSE(list_iter->has_next());

0 commit comments

Comments
 (0)