From b0d2ecbf526e5ee6fe413d762b99e67bb4c56e08 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Sun, 18 Feb 2024 10:37:34 +0800 Subject: [PATCH] [Improve](Tablet Schema) Use deterministic way to serialize protobuf (#101) (#30906) --- be/src/olap/rowset/rowset_meta.cpp | 3 ++- be/src/olap/tablet_schema.cpp | 17 +++++++++++++++-- be/src/olap/tablet_schema.h | 11 +++++++---- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/be/src/olap/rowset/rowset_meta.cpp b/be/src/olap/rowset/rowset_meta.cpp index 7f4798f97e..d8ef2e7b5d 100644 --- a/be/src/olap/rowset/rowset_meta.cpp +++ b/be/src/olap/rowset/rowset_meta.cpp @@ -130,7 +130,8 @@ void RowsetMeta::set_tablet_schema(const TabletSchemaPB& tablet_schema) { if (_handle) { TabletSchemaCache::instance()->release(_handle); } - auto pair = TabletSchemaCache::instance()->insert(tablet_schema.SerializeAsString()); + auto pair = TabletSchemaCache::instance()->insert( + TabletSchema::deterministic_string_serialize(tablet_schema)); _handle = pair.first; _schema = pair.second; } diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 058299cb86..3892c762c0 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include #include @@ -27,6 +30,7 @@ #include // IWYU pragma: keep #include #include +#include #include "common/compiler_util.h" // IWYU pragma: keep #include "common/consts.h" @@ -780,7 +784,7 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const { index->add_col_unique_id(col_unique_id); } index->set_index_type(_index_type); - for (auto& kv : _properties) { + for (const auto& kv : _properties) { (*index->mutable_properties())[kv.first] = kv.second; } index->set_index_suffix_name(_escaped_index_suffix_path); @@ -944,7 +948,7 @@ void TabletSchema::copy_from(const TabletSchema& tablet_schema) { std::string TabletSchema::to_key() const { TabletSchemaPB pb; to_schema_pb(&pb); - return pb.SerializeAsString(); + return TabletSchema::deterministic_string_serialize(pb); } void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version, @@ -1411,4 +1415,13 @@ bool operator!=(const TabletSchema& a, const TabletSchema& b) { return !(a == b); } +std::string TabletSchema::deterministic_string_serialize(const TabletSchemaPB& schema_pb) { + std::string output; + google::protobuf::io::StringOutputStream string_output_stream(&output); + google::protobuf::io::CodedOutputStream output_stream(&string_output_stream); + output_stream.SetSerializationDeterministic(true); + schema_pb.SerializeToCodedStream(&output_stream); + return output; +} + } // namespace doris diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 5e735b22a9..d4a1919101 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -253,6 +253,9 @@ public: // void create_from_pb(const TabletSchemaPB& schema, TabletSchema* tablet_schema). TabletSchema() = default; void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false); + // Notice: Use deterministic way to serialize protobuf, + // since serialize Map in protobuf may could lead to un-deterministic by default + static std::string deterministic_string_serialize(const TabletSchemaPB& schema_pb); void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const; void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL); void append_index(TabletIndex index); @@ -377,17 +380,17 @@ public: // Dump [(name, type, is_nullable), ...] string dump_structure() const { string str = "["; - for (auto p : _field_name_to_index) { + for (auto p : _cols) { if (str.size() > 1) { str += ", "; } str += "("; - str += p.first; + str += p.name(); str += ", "; - str += TabletColumn::get_string_by_field_type(_cols[p.second].type()); + str += TabletColumn::get_string_by_field_type(p.type()); str += ", "; str += "is_nullable:"; - str += (_cols[p.second].is_nullable() ? "true" : "false"); + str += (p.is_nullable() ? "true" : "false"); str += ")"; } str += "]";