@@ -1464,15 +1464,18 @@ int InstanceRecycler::delete_rowset_data(const doris::RowsetMetaCloudPB& rs_meta
1464
1464
return accessor->delete_files (file_paths);
1465
1465
}
1466
1466
1467
- int InstanceRecycler::delete_rowset_data (const std::vector<doris::RowsetMetaCloudPB>& rowsets) {
1467
+ int InstanceRecycler::delete_rowset_data (const std::vector<doris::RowsetMetaCloudPB>& rowsets,
1468
+ RowsetRecyclingState type) {
1468
1469
int ret = 0 ;
1469
1470
// resource_id -> file_paths
1470
1471
std::map<std::string, std::vector<std::string>> resource_file_paths;
1471
1472
// (resource_id, tablet_id, rowset_id)
1472
1473
std::vector<std::tuple<std::string, int64_t , std::string>> rowsets_delete_by_prefix;
1473
1474
1474
1475
for (const auto & rs : rowsets) {
1475
- {
1476
+ // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
1477
+ // due to aborted schema change.
1478
+ if (type == RowsetRecyclingState::FORMAL_ROWSET) {
1476
1479
std::lock_guard lock (recycled_tablets_mtx_);
1477
1480
if (recycled_tablets_.count (rs.tablet_id ())) {
1478
1481
continue ; // Rowset data has already been deleted
@@ -1499,7 +1502,7 @@ int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaClou
1499
1502
std::vector<std::pair<int64_t , std::string>> index_ids;
1500
1503
// default format as v1.
1501
1504
InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
1502
-
1505
+ int inverted_index_get_ret = 0 ;
1503
1506
if (rs.has_tablet_schema ()) {
1504
1507
for (const auto & index : rs.tablet_schema ().index ()) {
1505
1508
if (index .has_index_type () && index .index_type () == IndexType::INVERTED) {
@@ -1519,12 +1522,12 @@ int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaClou
1519
1522
continue ;
1520
1523
}
1521
1524
InvertedIndexInfo index_info;
1522
- int get_ret =
1525
+ inverted_index_get_ret =
1523
1526
inverted_index_id_cache_->get (rs.index_id (), rs.schema_version (), index_info);
1524
- if (get_ret == 0 ) {
1527
+ if (inverted_index_get_ret == 0 ) {
1525
1528
index_format = index_info.first ;
1526
1529
index_ids = index_info.second ;
1527
- } else if (get_ret == 1 ) {
1530
+ } else if (inverted_index_get_ret == 1 ) {
1528
1531
// 1. Schema kv not found means tablet has been recycled
1529
1532
// Maybe some tablet recycle failed by some bugs
1530
1533
// We need to delete again to double check
@@ -1562,7 +1565,10 @@ int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaClou
1562
1565
file_paths.push_back (inverted_index_path_v1 (tablet_id, rowset_id, i,
1563
1566
index_id.first , index_id.second ));
1564
1567
}
1565
- } else if (!index_ids.empty ()) {
1568
+ } else if (!index_ids.empty () || inverted_index_get_ret == 1 ) {
1569
+ // try to recycle inverted index v2 when get_ret == 1
1570
+ // we treat schema not found as if it has a v2 format inverted index
1571
+ // to reduce chance of data leakage
1566
1572
file_paths.push_back (inverted_index_path_v2 (tablet_id, rowset_id, i));
1567
1573
}
1568
1574
}
@@ -2028,7 +2034,7 @@ int InstanceRecycler::recycle_rowsets() {
2028
2034
rowsets_to_delete.swap (rowsets);
2029
2035
worker_pool->submit ([&, rowset_keys_to_delete = std::move (rowset_keys_to_delete),
2030
2036
rowsets_to_delete = std::move (rowsets_to_delete)]() {
2031
- if (delete_rowset_data (rowsets_to_delete) != 0 ) {
2037
+ if (delete_rowset_data (rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET ) != 0 ) {
2032
2038
LOG (WARNING) << " failed to delete rowset data, instance_id=" << instance_id_;
2033
2039
return ;
2034
2040
}
@@ -2225,7 +2231,7 @@ int InstanceRecycler::recycle_tmp_rowsets() {
2225
2231
tmp_rowset_keys.clear ();
2226
2232
tmp_rowsets.clear ();
2227
2233
});
2228
- if (delete_rowset_data (tmp_rowsets) != 0 ) {
2234
+ if (delete_rowset_data (tmp_rowsets, RowsetRecyclingState::TMP_ROWSET ) != 0 ) {
2229
2235
LOG (WARNING) << " failed to delete tmp rowset data, instance_id=" << instance_id_;
2230
2236
return -1 ;
2231
2237
}
0 commit comments