brach-2.1 cherry-pick [Fix](Variant) fix serialize with json key contains . as name (#51864)

cherry-pick from #51857
This commit is contained in:
lihangyu
2025-06-20 14:00:00 +08:00
committed by GitHub
parent 43a5116b9e
commit a75760d18f
10 changed files with 62 additions and 31 deletions

View File

@ -61,6 +61,14 @@ struct SubcolumnReader {
};
using SubcolumnColumnReaders = vectorized::SubcolumnsTree<SubcolumnReader>;
struct PathWithColumnAndType {
vectorized::PathInData path;
vectorized::ColumnPtr column;
vectorized::DataTypePtr type;
};
using PathsWithColumnAndType = std::vector<PathWithColumnAndType>;
// Reader for hierarchical data for variant, merge with root(sparse encoded columns)
class HierarchicalDataReader : public ColumnIterator {
public:

View File

@ -805,11 +805,7 @@ void ColumnObject::try_insert(const Field& field) {
}
const auto& object = field.get<const VariantMap&>();
size_t old_size = size();
for (const auto& [key_str, value] : object) {
PathInData key;
if (!key_str.empty()) {
key = PathInData(key_str);
}
for (const auto& [key, value] : object) {
if (!has_subcolumn(key)) {
bool succ = add_sub_column(key, old_size);
if (!succ) {
@ -894,7 +890,7 @@ void ColumnObject::get(size_t n, Field& res) const {
auto& object = res.get<VariantMap&>();
for (const auto& entry : subcolumns) {
auto it = object.try_emplace(entry->path.get_path()).first;
auto it = object.try_emplace(entry->path).first;
entry->data.get(n, it->second);
}
}

View File

@ -43,6 +43,7 @@
#include "util/quantile_state.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
#include "vec/json/path_in_data.h"
namespace doris {
namespace vectorized {
@ -153,13 +154,7 @@ DEFINE_FIELD_VECTOR(Tuple);
DEFINE_FIELD_VECTOR(Map);
#undef DEFINE_FIELD_VECTOR
using FieldMap = std::map<String, Field, std::less<String>>;
#define DEFINE_FIELD_MAP(X) \
struct X : public FieldMap { \
using FieldMap::FieldMap; \
}
DEFINE_FIELD_MAP(VariantMap);
#undef DEFINE_FIELD_MAP
using VariantMap = std::map<PathInData, Field>;
class JsonbField {
public:

View File

@ -69,6 +69,7 @@ int64_t DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
}
PColumnMeta column_meta_pb;
column_meta_pb.set_name(entry->path.get_path());
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
type->to_pb_column_meta(&column_meta_pb);
std::string meta_binary;
column_meta_pb.SerializeToString(&meta_binary);
@ -112,6 +113,7 @@ char* DataTypeObject::serialize(const IColumn& column, char* buf, int be_exec_ve
++num_of_columns;
PColumnMeta column_meta_pb;
column_meta_pb.set_name(entry->path.get_path());
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
type->to_pb_column_meta(&column_meta_pb);
std::string meta_binary;
column_meta_pb.SerializeToString(&meta_binary);
@ -157,11 +159,15 @@ const char* DataTypeObject::deserialize(const char* buf, IColumn* column,
MutableColumnPtr sub_column = type->create_column();
buf = type->deserialize(buf, sub_column.get(), be_exec_version);
// add subcolumn to column_object
PathInData key;
if (!column_meta_pb.name().empty()) {
if (column_meta_pb.has_column_path()) {
// init from path pb
key.from_protobuf(column_meta_pb.column_path());
} else if (!column_meta_pb.name().empty()) {
// init from name for compatible
key = PathInData {column_meta_pb.name()};
}
// add subcolumn to column_object
column_object->add_sub_column(key, std::move(sub_column), type);
}
size_t num_rows = 0;

View File

@ -28,6 +28,7 @@
#include <utility>
#include <vector>
#include "runtime/primitive_type.h"
#include "util/jsonb_writer.h"
#include "vec/columns/column.h"
#include "vec/common/string_ref.h"
@ -120,6 +121,13 @@ enum class ExtractType {
ToString = 0,
// ...
};
/// Result of parsing of a document.
/// Contains all paths extracted from document
/// and values which are related to them.
struct ParseResult {
std::vector<PathInData> paths;
std::vector<Field> values;
};
template <typename ParserImpl, bool parse_nested = false>
class JSONDataParser {
public:

View File

@ -30,7 +30,6 @@
#include "gen_cpp/segment_v2.pb.h"
#include "vec/common/uint128.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
namespace doris::vectorized {
@ -123,13 +122,6 @@ private:
size_t current_anonymous_array_level = 0;
};
using PathsInData = std::vector<PathInData>;
/// Result of parsing of a document.
/// Contains all paths extracted from document
/// and values which are related to them.
struct ParseResult {
std::vector<PathInData> paths;
std::vector<Field> values;
};
struct PathInDataRef {
const PathInData* ref;

View File

@ -22,6 +22,7 @@
#include <gtest/gtest.h>
#include "vec/columns/common_column_test.h"
#include "vec/json/path_in_data.h"
namespace doris::vectorized {

View File

@ -62,6 +62,8 @@ message PColumnMeta {
repeated PColumnMeta children = 5;
optional bool result_is_nullable = 6;
optional string function_name = 7;
optional int32 be_exec_version = 8;
optional segment_v2.ColumnPathInfo column_path = 9;
}
message PBlock {

View File

@ -37,13 +37,25 @@ UPPER CASE lower case
\N
\N
\N
""
""
1234566
16
8888888
"UPPER CASE"
"dkdkdkdkdkd"
"ooaoaaaaaaa"
"xmxxmmmmmm"
UPPER CASE
dkdkdkdkdkd
ooaoaaaaaaa
xmxxmmmmmm
-- !sql_cnt_1 --
128
-- !sql_cnt_2 --
128
-- !sql_cnt_3 --
128
-- !sql_cnt_4 --
128

View File

@ -25,7 +25,7 @@ suite("regression_test_variant_column_name", "variant_type"){
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true");
properties("replication_num" = "1", "disable_auto_compaction" = "false");
"""
// sql "set experimental_enable_nereids_planner = false"
@ -63,7 +63,18 @@ suite("regression_test_variant_column_name", "variant_type"){
sql """insert into var_column_name values (7, '{"": 1234566}')"""
sql """insert into var_column_name values (7, '{"": 8888888}')"""
qt_sql "select Tags[''] from var_column_name order by cast(Tags[''] as string)"
qt_sql "select cast(Tags[''] as text) from var_column_name order by cast(Tags[''] as string)"
// name with `.`
sql "truncate table var_column_name"
sql """insert into var_column_name values (7, '{"a.b": "UPPER CASE", "a.c": "lower case", "a" : {"b" : 123}, "a" : {"c" : 456}}')"""
for (int i = 0; i < 7; i++) {
sql """insert into var_column_name select * from var_column_name"""
}
qt_sql_cnt_1 "select count(Tags['a.b']) from var_column_name"
qt_sql_cnt_2 "select count(Tags['a.c']) from var_column_name"
qt_sql_cnt_3 "select count(Tags['a']['b']) from var_column_name"
qt_sql_cnt_4 "select count(Tags['a']['c']) from var_column_name"
try {
sql """insert into var_column_name values (7, '{"": "UPPER CASE", "": "lower case"}')"""