From 9b8da4486103ca3449c4a94f286c2f08cee81ff7 Mon Sep 17 00:00:00 2001 From: Gavin Chou Date: Sat, 8 Mar 2025 20:09:40 +0800 Subject: [PATCH] [opt](meta) Align and complete pb_convert * Fix some missing fields for pb_convert * add UT to prevent future missing fields --- be/src/cloud/pb_convert.cpp | 56 +++- be/src/olap/tablet_meta.cpp | 2 +- be/src/olap/tablet_schema.cpp | 8 +- be/src/olap/tablet_schema.h | 8 +- be/test/olap/pb_convert_test.cpp | 443 +++++++++++++++++++++++++++++++ gensrc/proto/olap_file.proto | 17 +- 6 files changed, 510 insertions(+), 24 deletions(-) create mode 100644 be/test/olap/pb_convert_test.cpp diff --git a/be/src/cloud/pb_convert.cpp b/be/src/cloud/pb_convert.cpp index 521729b44f6448..b1ddc9af33c2f6 100644 --- a/be/src/cloud/pb_convert.cpp +++ b/be/src/cloud/pb_convert.cpp @@ -82,7 +82,7 @@ void doris_rowset_meta_to_cloud(RowsetMetaCloudPB* out, const RowsetMetaPB& in) out->set_schema_version(in.schema_version()); } out->set_enable_segments_file_size(in.enable_segments_file_size()); - out->set_has_variant_type_in_schema(in.has_has_variant_type_in_schema()); + out->set_has_variant_type_in_schema(in.has_variant_type_in_schema()); out->set_enable_inverted_index_file_info(in.enable_inverted_index_file_info()); out->set_compaction_level(in.compaction_level()); out->mutable_inverted_index_file_info()->CopyFrom(in.inverted_index_file_info()); @@ -239,6 +239,7 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out, const RowsetMetaCloudPB& in, out->set_schema_version(in.schema_version()); } out->set_enable_segments_file_size(in.enable_segments_file_size()); + out->set_has_variant_type_in_schema(in.has_variant_type_in_schema()); out->set_enable_inverted_index_file_info(in.enable_inverted_index_file_info()); out->set_compaction_level(in.compaction_level()); out->mutable_inverted_index_file_info()->CopyFrom(in.inverted_index_file_info()); @@ -295,6 +296,7 @@ void cloud_rowset_meta_to_doris(RowsetMetaPB* out, RowsetMetaCloudPB&& in, out->set_schema_version(in.schema_version()); } out->set_enable_segments_file_size(in.enable_segments_file_size()); + out->set_has_variant_type_in_schema(in.has_variant_type_in_schema()); out->set_enable_inverted_index_file_info(in.enable_inverted_index_file_info()); out->set_compaction_level(in.compaction_level()); out->mutable_inverted_index_file_info()->Swap(in.mutable_inverted_index_file_info()); @@ -340,9 +342,11 @@ void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, const TabletSchemaPB out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->CopyFrom(in.row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); - out->set_enable_variant_flatten_nested(in.variant_enable_flatten_nested()); + out->set_enable_variant_flatten_nested(in.enable_variant_flatten_nested()); out->set_skip_bitmap_col_idx(in.skip_bitmap_col_idx()); out->set_storage_page_size(in.storage_page_size()); + out->set_is_in_memory(in.is_in_memory()); + out->set_row_store_page_size(in.row_store_page_size()); } void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, TabletSchemaPB&& in) { @@ -369,9 +373,11 @@ void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, TabletSchemaPB&& in) out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->Swap(in.mutable_row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); - out->set_enable_variant_flatten_nested(in.variant_enable_flatten_nested()); + out->set_enable_variant_flatten_nested(in.enable_variant_flatten_nested()); out->set_skip_bitmap_col_idx(in.skip_bitmap_col_idx()); out->set_storage_page_size(in.storage_page_size()); + out->set_is_in_memory(in.is_in_memory()); + out->set_row_store_page_size(in.row_store_page_size()); } TabletSchemaPB cloud_tablet_schema_to_doris(const TabletSchemaCloudPB& in) { @@ -411,9 +417,11 @@ void cloud_tablet_schema_to_doris(TabletSchemaPB* out, const TabletSchemaCloudPB out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->CopyFrom(in.row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); - out->set_variant_enable_flatten_nested(in.enable_variant_flatten_nested()); + out->set_enable_variant_flatten_nested(in.enable_variant_flatten_nested()); out->set_skip_bitmap_col_idx(in.skip_bitmap_col_idx()); out->set_storage_page_size(in.storage_page_size()); + out->set_is_in_memory(in.is_in_memory()); + out->set_row_store_page_size(in.row_store_page_size()); } void cloud_tablet_schema_to_doris(TabletSchemaPB* out, TabletSchemaCloudPB&& in) { @@ -441,9 +449,11 @@ void cloud_tablet_schema_to_doris(TabletSchemaPB* out, TabletSchemaCloudPB&& in) out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->Swap(in.mutable_row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); - out->set_variant_enable_flatten_nested(in.enable_variant_flatten_nested()); + out->set_enable_variant_flatten_nested(in.enable_variant_flatten_nested()); out->set_skip_bitmap_col_idx(in.skip_bitmap_col_idx()); out->set_storage_page_size(in.storage_page_size()); + out->set_is_in_memory(in.is_in_memory()); + out->set_row_store_page_size(in.row_store_page_size()); } TabletMetaCloudPB doris_tablet_meta_to_cloud(const TabletMetaPB& in) { @@ -477,7 +487,13 @@ void doris_tablet_meta_to_cloud(TabletMetaCloudPB* out, const TabletMetaPB& in) doris_rowset_meta_to_cloud(out->add_rs_metas(), rs_meta); } } - // ATTN: inc_rs_metas are deprecated, ignored here. + // ATTN: inc_rs_metas are deprecated, here is for conversion check + if (in.inc_rs_metas_size()) { + out->mutable_inc_rs_metas()->Reserve(in.inc_rs_metas_size()); + for (const auto& rs_meta : in.inc_rs_metas()) { + doris_rowset_meta_to_cloud(out->add_inc_rs_metas(), rs_meta); + } + } if (in.has_alter_task()) { out->mutable_alter_task()->CopyFrom(in.alter_task()); } @@ -541,7 +557,15 @@ void doris_tablet_meta_to_cloud(TabletMetaCloudPB* out, TabletMetaPB&& in) { doris_rowset_meta_to_cloud(out->add_rs_metas(), std::move(*in.mutable_rs_metas(i))); } } - // ATTN: inc_rs_metas are deprecated, ignored here. + // ATTN: inc_rs_metas are deprecated, here is for conversion check + if (in.inc_rs_metas_size()) { + size_t rs_metas_size = in.inc_rs_metas_size(); + out->mutable_inc_rs_metas()->Reserve(rs_metas_size); + for (size_t i = 0; i < rs_metas_size; ++i) { + doris_rowset_meta_to_cloud(out->add_inc_rs_metas(), + std::move(*in.mutable_inc_rs_metas(i))); + } + } if (in.has_alter_task()) { out->mutable_alter_task()->Swap(in.mutable_alter_task()); } @@ -618,7 +642,13 @@ void cloud_tablet_meta_to_doris(TabletMetaPB* out, const TabletMetaCloudPB& in) cloud_rowset_meta_to_doris(out->add_rs_metas(), rs_meta); } } - // ATTN: inc_rs_metas are deprecated, ignored here. + // ATTN: inc_rs_metas are deprecated, here is for conversion check + if (in.inc_rs_metas_size()) { + out->mutable_inc_rs_metas()->Reserve(in.inc_rs_metas_size()); + for (const auto& rs_meta : in.inc_rs_metas()) { + cloud_rowset_meta_to_doris(out->add_inc_rs_metas(), rs_meta); + } + } if (in.has_alter_task()) { out->mutable_alter_task()->CopyFrom(in.alter_task()); } @@ -682,7 +712,15 @@ void cloud_tablet_meta_to_doris(TabletMetaPB* out, TabletMetaCloudPB&& in) { cloud_rowset_meta_to_doris(out->add_rs_metas(), std::move(*in.mutable_rs_metas(i))); } } - // ATTN: inc_rs_metas are deprecated, ignored here. + // ATTN: inc_rs_metas are deprecated, here is for conversion check + if (in.inc_rs_metas_size()) { + size_t rs_metas_size = in.inc_rs_metas_size(); + out->mutable_inc_rs_metas()->Reserve(rs_metas_size); + for (size_t i = 0; i < rs_metas_size; i++) { + cloud_rowset_meta_to_doris(out->add_inc_rs_metas(), + std::move(*in.mutable_inc_rs_metas(i))); + } + } if (in.has_alter_task()) { out->mutable_alter_task()->Swap(in.mutable_alter_task()); } diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 08d774ce67d247..bb03ceeef9b9f0 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -325,7 +325,7 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id } if (tablet_schema.__isset.variant_enable_flatten_nested) { - schema->set_variant_enable_flatten_nested(tablet_schema.variant_enable_flatten_nested); + schema->set_enable_variant_flatten_nested(tablet_schema.variant_enable_flatten_nested); } if (tablet_schema.__isset.enable_single_replica_compaction) { diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 716711589eaff7..a2c4f29ce0d8bc 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -1069,7 +1069,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(), schema.row_store_column_unique_ids().end()); - _variant_enable_flatten_nested = schema.variant_enable_flatten_nested(); + _enable_variant_flatten_nested = schema.enable_variant_flatten_nested(); _vl_field_mem_size += _row_store_column_unique_ids.capacity() * sizeof(int32_t); update_metadata_size(); } @@ -1139,7 +1139,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _sort_col_num = ori_tablet_schema.sort_col_num(); _row_store_page_size = ori_tablet_schema.row_store_page_size(); _storage_page_size = ori_tablet_schema.storage_page_size(); - _variant_enable_flatten_nested = ori_tablet_schema.variant_flatten_nested(); + _enable_variant_flatten_nested = ori_tablet_schema.variant_flatten_nested(); // copy from table_schema_param _schema_version = version; @@ -1304,7 +1304,7 @@ void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format); tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign( _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end()); - tablet_schema_pb->set_variant_enable_flatten_nested(_variant_enable_flatten_nested); + tablet_schema_pb->set_enable_variant_flatten_nested(_enable_variant_flatten_nested); } size_t TabletSchema::row_size() const { @@ -1573,7 +1573,7 @@ bool operator==(const TabletSchema& a, const TabletSchema& b) { if (a._row_store_page_size != b._row_store_page_size) return false; if (a._storage_page_size != b._storage_page_size) return false; if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false; - if (a._variant_enable_flatten_nested != b._variant_enable_flatten_nested) return false; + if (a._enable_variant_flatten_nested != b._enable_variant_flatten_nested) return false; return true; } diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 957b9adb2b9476..e5f5a4c348dbd5 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -371,10 +371,10 @@ class TabletSchema : public MetadataAdder { _disable_auto_compaction = disable_auto_compaction; } bool disable_auto_compaction() const { return _disable_auto_compaction; } - void set_variant_enable_flatten_nested(bool flatten_nested) { - _variant_enable_flatten_nested = flatten_nested; + void set_enable_variant_flatten_nested(bool flatten_nested) { + _enable_variant_flatten_nested = flatten_nested; } - bool variant_flatten_nested() const { return _variant_enable_flatten_nested; } + bool variant_flatten_nested() const { return _enable_variant_flatten_nested; } void set_enable_single_replica_compaction(bool enable_single_replica_compaction) { _enable_single_replica_compaction = enable_single_replica_compaction; } @@ -594,7 +594,7 @@ class TabletSchema : public MetadataAdder { // Contains column ids of which columns should be encoded into row store. // ATTN: For compability reason empty cids means all columns of tablet schema are encoded to row column std::vector _row_store_column_unique_ids; - bool _variant_enable_flatten_nested = false; + bool _enable_variant_flatten_nested = false; int64_t _vl_field_mem_size {0}; // variable length field }; diff --git a/be/test/olap/pb_convert_test.cpp b/be/test/olap/pb_convert_test.cpp new file mode 100644 index 00000000000000..9e03b023a4b74c --- /dev/null +++ b/be/test/olap/pb_convert_test.cpp @@ -0,0 +1,443 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "cloud/pb_convert.h" + +#include + +#include +#include +#include +#include + +#include "google/protobuf/message.h" +#include "gtest/gtest.h" + +namespace doris { + +using namespace doris::cloud; +using namespace google::protobuf; + +// RowsetMetaPB <=> RowsetMetaCloudPB +// TabletSchemaPB <=> TabletSchemaCloudPB +// TabletMetaPB <=> TabletMetaCloudPB + +// test if 2 PBs have the same declared fields: count and names. +// note that reserved fields are not considered +bool have_same_fields(const google::protobuf::Descriptor* desc1, + const google::protobuf::Descriptor* desc2) { + if (desc1->field_count() != desc2->field_count()) { + return false; + } + std::set fields1; + for (int i = 0; i < desc1->field_count(); ++i) { + fields1.insert(desc1->field(i)->name()); + } + std::set fields2; + for (int i = 0; i < desc2->field_count(); ++i) { + fields2.insert(desc2->field(i)->name()); + } + return fields1 == fields2; +} + +// traverse all fields of the given message clear them and set them to a default +// value, after which, all has_xxx() function will return true; +void set_all_fields_to_default(Message* message) { + const Descriptor* descriptor = message->GetDescriptor(); + const Reflection* reflection = message->GetReflection(); + + // set scalar value to the field + auto set_scalar_type = [](Message* m, const FieldDescriptor* f, const Reflection* r) { + switch (f->cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + r->SetInt32(m, f, 0); + break; + case FieldDescriptor::CPPTYPE_INT64: + r->SetInt64(m, f, 0); + break; + case FieldDescriptor::CPPTYPE_UINT32: + r->SetUInt32(m, f, 0); + break; + case FieldDescriptor::CPPTYPE_UINT64: + r->SetUInt64(m, f, 0); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + r->SetDouble(m, f, 0.0); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + r->SetFloat(m, f, 0.0f); + break; + case FieldDescriptor::CPPTYPE_BOOL: + r->SetBool(m, f, false); + break; + case FieldDescriptor::CPPTYPE_ENUM: + r->SetEnum(m, f, f->enum_type()->value(0)); + break; + case FieldDescriptor::CPPTYPE_STRING: { + const std::string empty_str; + r->SetString(m, f, empty_str); + break; + } + default: + EXPECT_TRUE(false) << "unexpected branch reached"; + break; + } + }; + + // add a scalar value to the repeated field + auto add_scalar_type = [](Message* m, const FieldDescriptor* f, const Reflection* r) { + switch (f->cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + r->AddInt32(m, f, 0); + break; + case FieldDescriptor::CPPTYPE_INT64: + r->AddInt64(m, f, 0); + break; + case FieldDescriptor::CPPTYPE_UINT32: + r->AddUInt32(m, f, 0); + break; + case FieldDescriptor::CPPTYPE_UINT64: + r->AddUInt64(m, f, 0); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + r->AddDouble(m, f, 0.0); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + r->AddFloat(m, f, 0.0f); + break; + case FieldDescriptor::CPPTYPE_BOOL: + r->AddBool(m, f, false); + break; + case FieldDescriptor::CPPTYPE_ENUM: + r->AddEnum(m, f, f->enum_type()->value(0)); + break; + case FieldDescriptor::CPPTYPE_STRING: { + const std::string empty_str; + r->AddString(m, f, empty_str); + break; + } + default: + EXPECT_TRUE(false) << "unexpected branch reached"; + break; + } + }; + + for (int i = 0; i < descriptor->field_count(); ++i) { + const FieldDescriptor* field = descriptor->field(i); + if (field->is_repeated()) { // add an element + reflection->ClearField(message, field); + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + [[maybe_unused]] Message* sub_message = reflection->AddMessage(message, field); + // the following has memory issue, however it is no need to set vaule + // set_all_fields_to_default(sub_message); + } else { + add_scalar_type(message, field, reflection); + } + } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + Message* sub_message = reflection->MutableMessage(message, field); + set_all_fields_to_default(sub_message); + } else { + set_scalar_type(message, field, reflection); + } + } +} + +// get all names of fields that are set, despite of the vaules, of the msg +std::set get_set_fields(const google::protobuf::Message& msg) { + std::set set_fields; + const auto* descriptor = msg.GetDescriptor(); + const auto* reflection = msg.GetReflection(); + for (int i = 0; i < descriptor->field_count(); ++i) { + const auto* field = descriptor->field(i); + if (field->is_repeated()) { + if (reflection->FieldSize(msg, field) > 0) { + set_fields.insert(field->name()); + } + } else { + if (reflection->HasField(msg, field)) { + set_fields.insert(field->name()); + } + } + } + return set_fields; +} + +// clang-format off + +auto print = [](auto v) { std::stringstream s; for (auto& i : v) s << i << " "; return s.str(); }; +auto set_diff = [](auto a, auto b) { + std::set r; + std::set_difference(a.begin(), a.end(), b.begin(), b.end(), std::inserter(r, r.end())); + return r; +}; + +// ensure that PBs need to be converted have identical fields, so that they can +// be inter-converted +TEST(PbConvert, ensure_identical_fields) { + EXPECT_EQ(RowsetMetaPB::GetDescriptor()->field_count(), RowsetMetaCloudPB::GetDescriptor()->field_count()); + EXPECT_EQ(TabletSchemaPB::GetDescriptor()->field_count(), TabletSchemaCloudPB::GetDescriptor()->field_count()); + EXPECT_EQ(TabletMetaPB::GetDescriptor()->field_count(), TabletMetaCloudPB::GetDescriptor()->field_count()); + + EXPECT_TRUE(have_same_fields(RowsetMetaPB::GetDescriptor(), RowsetMetaCloudPB::GetDescriptor())); + EXPECT_TRUE(have_same_fields(TabletMetaPB::GetDescriptor(), TabletMetaCloudPB::GetDescriptor())); + EXPECT_TRUE(have_same_fields(TabletSchemaPB::GetDescriptor(), TabletSchemaCloudPB::GetDescriptor())); +} + +TEST(PbConvert, ensure_all_fields_converted_correctly) { + // rowset meta + RowsetMetaPB rs; + set_all_fields_to_default(&rs); + auto rowset_meta_set_fields = get_set_fields(rs); + EXPECT_EQ(rowset_meta_set_fields.size(), RowsetMetaPB::GetDescriptor()->field_count()) << print(rowset_meta_set_fields); + + RowsetMetaCloudPB rs_cloud; + set_all_fields_to_default(&rs_cloud); + auto rowset_meta_cloud_set_fields = get_set_fields(rs_cloud); + EXPECT_EQ(rowset_meta_cloud_set_fields.size(), RowsetMetaCloudPB::GetDescriptor()->field_count()) << print(rowset_meta_cloud_set_fields); + EXPECT_EQ(rowset_meta_set_fields.size(), rowset_meta_cloud_set_fields.size()); + + RowsetMetaCloudPB rowset_meta_cloud_out; + doris_rowset_meta_to_cloud(&rowset_meta_cloud_out, rs); + auto rowset_meta_cloud_out_set_fields = get_set_fields(rowset_meta_cloud_out); + EXPECT_EQ(rowset_meta_set_fields.size(), rowset_meta_cloud_out_set_fields.size()) + << "doris_rowset_meta_to_cloud() missing output fields," + << "\n input_fields=" << print(rowset_meta_set_fields) + << "\n output_fields=" << print(rowset_meta_cloud_out_set_fields) + << "\n diff=" << print(set_diff(rowset_meta_set_fields, rowset_meta_cloud_out_set_fields)); + + RowsetMetaPB rowset_meta_out; + cloud_rowset_meta_to_doris(&rowset_meta_out, rs_cloud); + auto rowset_meta_out_set_fields = get_set_fields(rowset_meta_out); + EXPECT_EQ(rowset_meta_cloud_set_fields.size(), rowset_meta_out_set_fields.size()) + << "cloud_rowset_meta_to_doris() missing output fields," + << "\n input_fields=" << print(rowset_meta_cloud_set_fields) + << "\n output_fields=" << print(rowset_meta_out_set_fields) + << "\n diff=" << print(set_diff(rowset_meta_cloud_set_fields, rowset_meta_out_set_fields)); + + // tablet schema + TabletSchemaPB tablet_schema; + set_all_fields_to_default(&tablet_schema); + auto tablet_schema_set_fields = get_set_fields(tablet_schema); + EXPECT_EQ(tablet_schema_set_fields.size(), TabletSchemaPB::GetDescriptor()->field_count()) << print(tablet_schema_set_fields); + + TabletSchemaCloudPB tablet_schema_cloud; + set_all_fields_to_default(&tablet_schema_cloud); + auto tablet_schema_cloud_set_fields = get_set_fields(tablet_schema_cloud); + EXPECT_EQ(tablet_schema_cloud_set_fields.size(), TabletSchemaCloudPB::GetDescriptor()->field_count()) << print(tablet_schema_cloud_set_fields); + EXPECT_EQ(tablet_schema_set_fields.size(), tablet_schema_cloud_set_fields.size()); + + TabletSchemaCloudPB tablet_schema_cloud_out; + doris_tablet_schema_to_cloud(&tablet_schema_cloud_out, tablet_schema); + auto tablet_schema_cloud_out_set_fields = get_set_fields(tablet_schema_cloud_out); + EXPECT_EQ(tablet_schema_set_fields.size(), tablet_schema_cloud_out_set_fields.size()) + << "doris_tablet_schema_to_cloud() missing output fields," + << "\n input_fields=" << print(tablet_schema_set_fields) + << "\n output_fields=" << print(tablet_schema_cloud_out_set_fields) + << "\n diff=" << print(set_diff(tablet_schema_set_fields, tablet_schema_cloud_out_set_fields)); + + TabletSchemaPB tablet_schema_out; + cloud_tablet_schema_to_doris(&tablet_schema_out, tablet_schema_cloud); + auto tablet_schema_out_set_fields = get_set_fields(tablet_schema_out); + EXPECT_EQ(tablet_schema_cloud_set_fields.size(), tablet_schema_out_set_fields.size()) + << "cloud_tablet_schema_to_doris() missing output fields," + << "\n input_fields=" << print(tablet_schema_cloud_set_fields) + << "\n output_fields=" << print(tablet_schema_out_set_fields) + << "\n diff=" << print(set_diff(tablet_schema_cloud_set_fields, tablet_schema_out_set_fields)); + + // tablet meta + TabletMetaPB tablet_meta; + set_all_fields_to_default(&tablet_meta); + auto tablet_meta_set_fields = get_set_fields(tablet_meta); + EXPECT_EQ(tablet_meta_set_fields.size(), TabletMetaPB::GetDescriptor()->field_count()) << print(tablet_meta_set_fields); + TabletMetaCloudPB tablet_meta_cloud; + set_all_fields_to_default(&tablet_meta_cloud); + auto tablet_meta_cloud_set_fields = get_set_fields(tablet_meta_cloud); + EXPECT_EQ(tablet_meta_cloud_set_fields.size(), TabletMetaCloudPB::GetDescriptor()->field_count()) << print(tablet_meta_cloud_set_fields); + EXPECT_EQ(tablet_meta_set_fields.size(), tablet_meta_cloud_set_fields.size()); + + TabletMetaCloudPB tablet_meta_cloud_out; + doris_tablet_meta_to_cloud(&tablet_meta_cloud_out, tablet_meta); + auto tablet_meta_cloud_out_set_fields = get_set_fields(tablet_meta_cloud_out); + EXPECT_EQ(tablet_meta_set_fields.size(), tablet_meta_cloud_out_set_fields.size()) + << "doris_tablet_meta_to_cloud() missing output fields," + << "\n input_fields=" << print(tablet_meta_set_fields) + << "\n output_fields=" << print(tablet_meta_cloud_out_set_fields) + << "\n diff=" << print(set_diff(tablet_meta_set_fields, tablet_meta_cloud_out_set_fields)); + + TabletMetaPB tablet_meta_out; + cloud_tablet_meta_to_doris(&tablet_meta_out, tablet_meta_cloud); + auto tablet_meta_out_set_fields = get_set_fields(tablet_meta_out); + EXPECT_EQ(tablet_meta_cloud_set_fields.size(), tablet_meta_out_set_fields.size()) + << "cloud_tablet_meta_to_doris() missing output fields," + << "\n input_fields=" << print(tablet_meta_cloud_set_fields) + << "\n output_fields=" << print(tablet_meta_out_set_fields) + << "\n diff=" << print(set_diff(tablet_meta_cloud_set_fields, tablet_meta_out_set_fields)); +} + +TEST(PbConvert, test_rvalue_overloads) { + // rowset meta + RowsetMetaPB rs; + set_all_fields_to_default(&rs); + auto rs_set_fields = get_set_fields(rs); + + RowsetMetaCloudPB rs_cloud; + set_all_fields_to_default(&rs_cloud); + auto rs_cloud_set_fields = get_set_fields(rs_cloud); + EXPECT_EQ(rs_set_fields.size(), rs_cloud_set_fields.size()); + + RowsetMetaCloudPB rs_cloud_out; + rs_cloud_out = doris_rowset_meta_to_cloud(std::move(rs)); + auto rs_cloud_out_set_fields = get_set_fields(rs_cloud_out); + EXPECT_EQ(rs_set_fields.size(), rs_cloud_out_set_fields.size()) + << "doris_rowset_meta_to_cloud() missing output fields," + << "\n input_fields=" << print(rs_set_fields) + << "\n output_fields=" << print(rs_cloud_out_set_fields) + << "\n diff=" << print(set_diff(rs_set_fields, rs_cloud_out_set_fields)); + + RowsetMetaPB rs_out; + rs_out = cloud_rowset_meta_to_doris(std::move(rs_cloud)); + auto rs_out_set_fields = get_set_fields(rs_out); + EXPECT_EQ(rs_cloud_set_fields.size(), rs_out_set_fields.size()) + << "cloud_rowset_meta_to_doris() missing output fields," + << "\n input_fields=" << print(rs_cloud_set_fields) + << "\n output_fields=" << print(rs_out_set_fields) + << "\n diff=" << print(set_diff(rs_cloud_set_fields, rs_out_set_fields)); + + // tablet schema + TabletSchemaPB tablet_schema; + set_all_fields_to_default(&tablet_schema); + auto tablet_schema_set_fields = get_set_fields(tablet_schema); + + TabletSchemaCloudPB tablet_schema_cloud; + set_all_fields_to_default(&tablet_schema_cloud); + auto tablet_schema_cloud_set_fields = get_set_fields(tablet_schema_cloud); + EXPECT_EQ(tablet_schema_set_fields.size(), tablet_schema_cloud_set_fields.size()); + + TabletSchemaCloudPB tablet_schema_cloud_out; + tablet_schema_cloud_out = doris_tablet_schema_to_cloud(std::move(tablet_schema)); + auto tablet_schema_cloud_out_set_fields = get_set_fields(tablet_schema_cloud_out); + EXPECT_EQ(tablet_schema_set_fields.size(), tablet_schema_cloud_out_set_fields.size()) + << "doris_tablet_schema_to_cloud() missing output fields," + << "\n input_fields=" << print(tablet_schema_set_fields) + << "\n output_fields=" << print(tablet_schema_cloud_out_set_fields) + << "\n diff=" << print(set_diff(tablet_schema_set_fields, tablet_schema_cloud_out_set_fields)); + + TabletSchemaPB tablet_schema_out; + tablet_schema_out = cloud_tablet_schema_to_doris(std::move(tablet_schema_cloud)); + auto tablet_schema_out_set_fields = get_set_fields(tablet_schema_out); + EXPECT_EQ(tablet_schema_cloud_set_fields.size(), tablet_schema_out_set_fields.size()) + << "cloud_tablet_schema_to_doris() missing output fields," + << "\n input_fields=" << print(tablet_schema_cloud_set_fields) + << "\n output_fields=" << print(tablet_schema_out_set_fields) + << "\n diff=" << print(set_diff(tablet_schema_cloud_set_fields, tablet_schema_out_set_fields)); + + // tablet meta + TabletMetaPB tablet_meta; + set_all_fields_to_default(&tablet_meta); + auto tablet_meta_set_fields = get_set_fields(tablet_meta); + + TabletMetaCloudPB tablet_meta_cloud; + set_all_fields_to_default(&tablet_meta_cloud); + auto tablet_meta_cloud_set_fields = get_set_fields(tablet_meta_cloud); + EXPECT_EQ(tablet_meta_set_fields.size(), tablet_meta_cloud_set_fields.size()); + + TabletMetaCloudPB tablet_meta_cloud_out; + tablet_meta_cloud_out = doris_tablet_meta_to_cloud(std::move(tablet_meta)); + auto tablet_meta_cloud_out_set_fields = get_set_fields(tablet_meta_cloud_out); + EXPECT_EQ(tablet_meta_set_fields.size(), tablet_meta_cloud_out_set_fields.size()) + << "doris_tablet_meta_to_cloud() missing output fields," + << "\n input_fields=" << print(tablet_meta_set_fields) + << "\n output_fields=" << print(tablet_meta_cloud_out_set_fields) + << "\n diff=" << print(set_diff(tablet_meta_set_fields, tablet_meta_cloud_out_set_fields)); + + TabletMetaPB tablet_meta_out; + tablet_meta_out = cloud_tablet_meta_to_doris(std::move(tablet_meta_cloud)); + auto tablet_meta_out_set_fields = get_set_fields(tablet_meta_out); + EXPECT_EQ(tablet_meta_cloud_set_fields.size(), tablet_meta_out_set_fields.size()) + << "cloud_tablet_meta_to_doris() missing output fields," + << "\n input_fields=" << print(tablet_meta_cloud_set_fields) + << "\n output_fields=" << print(tablet_meta_out_set_fields) + << "\n diff=" << print(set_diff(tablet_meta_cloud_set_fields, tablet_meta_out_set_fields)); +} + +TEST(PbConvert, test_return_value_overloads) { + // rowset meta + RowsetMetaPB rs; + set_all_fields_to_default(&rs); + auto rs_set_fields = get_set_fields(rs); + EXPECT_EQ(rs_set_fields.size(), RowsetMetaPB::GetDescriptor()->field_count()); + + RowsetMetaCloudPB rs_cloud; + set_all_fields_to_default(&rs_cloud); + auto rs_cloud_set_fields = get_set_fields(rs_cloud); + EXPECT_EQ(rs_cloud_set_fields.size(), RowsetMetaCloudPB::GetDescriptor()->field_count()); + EXPECT_EQ(rs_set_fields.size(), rs_cloud_set_fields.size()); + + RowsetMetaCloudPB rs_cloud_out; + rs_cloud_out = doris_rowset_meta_to_cloud(std::move(rs)); + auto rs_cloud_out_set_fields = get_set_fields(rs_cloud_out); + EXPECT_EQ(rs_set_fields.size(), rs_cloud_out_set_fields.size()) + << "doris_rowset_meta_to_cloud() missing output fields," + << "\n input_fields=" << print(rs_set_fields) + << "\n output_fields=" << print(rs_cloud_out_set_fields) + << "\n diff=" << print(set_diff(rs_set_fields, rs_cloud_out_set_fields)); + + RowsetMetaPB rs_out; + rs_out = cloud_rowset_meta_to_doris(std::move(rs_cloud)); + auto rs_out_set_fields = get_set_fields(rs_out); + EXPECT_EQ(rs_cloud_set_fields.size(), rs_out_set_fields.size()) + << "cloud_rowset_meta_to_doris() missing output fields," + << "\n input_fields=" << print(rs_cloud_set_fields) + << "\n output_fields=" << print(rs_out_set_fields) + << "\n diff=" << print(set_diff(rs_cloud_set_fields, rs_out_set_fields)); + + // tablet schema + TabletSchemaPB tablet_schema; + set_all_fields_to_default(&tablet_schema); + auto tablet_schema_set_fields = get_set_fields(tablet_schema); + EXPECT_EQ(tablet_schema_set_fields.size(), TabletSchemaPB::GetDescriptor()->field_count()); + + TabletSchemaCloudPB tablet_schema_cloud; + set_all_fields_to_default(&tablet_schema_cloud); + auto tablet_schema_cloud_set_fields = get_set_fields(tablet_schema_cloud); + EXPECT_EQ(tablet_schema_cloud_set_fields.size(), TabletSchemaCloudPB::GetDescriptor()->field_count()); + EXPECT_EQ(tablet_schema_set_fields.size(), tablet_schema_cloud_set_fields.size()); + + TabletSchemaCloudPB tablet_schema_cloud_out; + tablet_schema_cloud_out = doris_tablet_schema_to_cloud(std::move(tablet_schema)); + auto tablet_schema_cloud_out_set_fields = get_set_fields(tablet_schema_cloud_out); + EXPECT_EQ(tablet_schema_set_fields.size(), tablet_schema_cloud_out_set_fields.size()) + << "doris_tablet_schema_to_cloud() missing output fields," + << "\n input_fields=" << print(tablet_schema_set_fields) + << "\n output_fields=" << print(tablet_schema_cloud_out_set_fields) + << "\n diff=" << print(set_diff(tablet_schema_set_fields, tablet_schema_cloud_out_set_fields)); + + TabletSchemaPB tablet_schema_out; + tablet_schema_out = cloud_tablet_schema_to_doris(std::move(tablet_schema_cloud)); + auto tablet_schema_out_set_fields = get_set_fields(tablet_schema_out); + EXPECT_EQ(tablet_schema_cloud_set_fields.size(), tablet_schema_out_set_fields.size()) + << "cloud_tablet_schema_to_doris() missing output fields," + << "\n input_fields=" << print(tablet_schema_cloud_set_fields) + << "\n output_fields=" << print(tablet_schema_out_set_fields) + << "\n diff=" << print(set_diff(tablet_schema_cloud_set_fields, tablet_schema_out_set_fields)); +} + +// clang-format on + +} // namespace doris + +// vim: et tw=80 ts=4 sw=4 cc=80: diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 3bd76279b45fda..d831eb085c7f8c 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -137,6 +137,7 @@ message RowsetMetaPB { optional bool enable_inverted_index_file_info = 1006; repeated InvertedIndexFileInfo inverted_index_file_info = 1007; + optional SchemaDictKeyList schema_dict_key_list = 1008; // align to cloud rowset } message SchemaDictKeyList { @@ -383,6 +384,7 @@ message TabletSchemaPB { optional CompressKind compress_kind = 5; // OLAPHeaderMessage.compress_kind optional double bf_fpp = 6; // OLAPHeaderMessage.bf_fpp optional uint32 next_column_unique_id = 7; // OLAPHeaderMessage.next_column_unique_id + // FIXME(gavin): deprecate and remove in the future optional bool is_in_memory = 8 [default=false]; optional int32 delete_sign_idx = 9 [default = -1]; optional int32 sequence_col_idx = 10 [default= -1]; @@ -395,8 +397,8 @@ message TabletSchemaPB { optional int32 version_col_idx = 17 [default = -1]; optional bool store_row_column = 18 [default=false]; // store tuplerow oriented column optional bool is_dynamic_schema = 19 [default=false]; // deprecated - optional bool is_partial_update = 20 [default=false]; // deprecated - repeated string partial_update_input_columns = 21; // deprecated + reserved 20; // deprecated is_partial_update + reserved 21; // deprecated partial_update_input_columns optional bool enable_single_replica_compaction = 22 [default=false]; optional bool skip_write_index_on_load = 23 [default=false]; repeated int32 cluster_key_uids = 24; @@ -405,7 +407,7 @@ message TabletSchemaPB { repeated int32 row_store_column_unique_ids = 26; optional int64 row_store_page_size = 27 [default=16384]; - optional bool variant_enable_flatten_nested = 28 [default=false]; + optional bool enable_variant_flatten_nested = 28 [default=false]; optional int32 skip_bitmap_col_idx = 29 [default = -1]; optional int64 storage_page_size = 30 [default=65536]; } @@ -437,12 +439,15 @@ message TabletSchemaCloudPB { // column unique ids for row store columns repeated int32 row_store_column_unique_ids = 26; optional int64 row_store_page_size = 27 [default=16384]; - optional bool enable_mow_light_delete = 28 [default=false]; + reserved 28; // deprecated enable_mow_light_delete optional bool enable_variant_flatten_nested = 29 [default=false]; optional int32 skip_bitmap_col_idx = 30 [default = -1]; optional int64 storage_page_size = 31 [default=65536]; optional bool is_dynamic_schema = 100 [default=false]; + + // FIXME(gavin): deprecate and remove in the future + optional bool is_in_memory = 200 [default=false]; // unused, just keep align to TabletSchemaPB } enum TabletStatePB { @@ -578,8 +583,8 @@ message TabletMetaCloudPB { optional int64 time_series_compaction_goal_size_mbytes = 32 [default = 1024]; optional int64 time_series_compaction_file_count_threshold = 33 [default = 2000]; optional int64 time_series_compaction_time_threshold_seconds = 34 [default = 3600]; - optional int64 group_commit_interval_ms = 35 [default = 10000]; - optional int64 group_commit_data_bytes = 36 [default = 134217728]; + reserved 35; // deprecated group_commit_interval_ms + reserved 36; // deprecated group_commit_data_bytes optional int64 time_series_compaction_empty_rowsets_threshold = 37 [default = 5]; optional int64 time_series_compaction_level_threshold = 38 [default = 1];