[Improve](Tablet Schema) Use deterministic way to serialize protobuf (#101) (#30906)

This commit is contained in:
lihangyu
2024-02-18 10:37:34 +08:00
committed by yiguolei
parent f9e16e08eb
commit b0d2ecbf52
3 changed files with 24 additions and 7 deletions

View File

@ -130,7 +130,8 @@ void RowsetMeta::set_tablet_schema(const TabletSchemaPB& tablet_schema) {
if (_handle) {
TabletSchemaCache::instance()->release(_handle);
}
auto pair = TabletSchemaCache::instance()->insert(tablet_schema.SerializeAsString());
auto pair = TabletSchemaCache::instance()->insert(
TabletSchema::deterministic_string_serialize(tablet_schema));
_handle = pair.first;
_schema = pair.second;
}

View File

@ -20,6 +20,9 @@
#include <gen_cpp/Descriptors_types.h>
#include <gen_cpp/olap_file.pb.h>
#include <glog/logging.h>
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include <algorithm>
#include <cctype>
@ -27,6 +30,7 @@
#include <cmath> // IWYU pragma: keep
#include <memory>
#include <ostream>
#include <vector>
#include "common/compiler_util.h" // IWYU pragma: keep
#include "common/consts.h"
@ -780,7 +784,7 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
index->add_col_unique_id(col_unique_id);
}
index->set_index_type(_index_type);
for (auto& kv : _properties) {
for (const auto& kv : _properties) {
(*index->mutable_properties())[kv.first] = kv.second;
}
index->set_index_suffix_name(_escaped_index_suffix_path);
@ -944,7 +948,7 @@ void TabletSchema::copy_from(const TabletSchema& tablet_schema) {
std::string TabletSchema::to_key() const {
TabletSchemaPB pb;
to_schema_pb(&pb);
return pb.SerializeAsString();
return TabletSchema::deterministic_string_serialize(pb);
}
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version,
@ -1411,4 +1415,13 @@ bool operator!=(const TabletSchema& a, const TabletSchema& b) {
return !(a == b);
}
std::string TabletSchema::deterministic_string_serialize(const TabletSchemaPB& schema_pb) {
std::string output;
google::protobuf::io::StringOutputStream string_output_stream(&output);
google::protobuf::io::CodedOutputStream output_stream(&string_output_stream);
output_stream.SetSerializationDeterministic(true);
schema_pb.SerializeToCodedStream(&output_stream);
return output;
}
} // namespace doris

View File

@ -253,6 +253,9 @@ public:
// void create_from_pb(const TabletSchemaPB& schema, TabletSchema* tablet_schema).
TabletSchema() = default;
void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false);
// Notice: Use deterministic way to serialize protobuf,
// since serialize Map in protobuf may could lead to un-deterministic by default
static std::string deterministic_string_serialize(const TabletSchemaPB& schema_pb);
void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const;
void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL);
void append_index(TabletIndex index);
@ -377,17 +380,17 @@ public:
// Dump [(name, type, is_nullable), ...]
string dump_structure() const {
string str = "[";
for (auto p : _field_name_to_index) {
for (auto p : _cols) {
if (str.size() > 1) {
str += ", ";
}
str += "(";
str += p.first;
str += p.name();
str += ", ";
str += TabletColumn::get_string_by_field_type(_cols[p.second].type());
str += TabletColumn::get_string_by_field_type(p.type());
str += ", ";
str += "is_nullable:";
str += (_cols[p.second].is_nullable() ? "true" : "false");
str += (p.is_nullable() ? "true" : "false");
str += ")";
}
str += "]";