brach-2.1 cherry-pick [Fix](Variant) fix serialize with json key contains . as name (#51864)
cherry-pick from #51857
This commit is contained in:
@ -61,6 +61,14 @@ struct SubcolumnReader {
|
||||
};
|
||||
using SubcolumnColumnReaders = vectorized::SubcolumnsTree<SubcolumnReader>;
|
||||
|
||||
struct PathWithColumnAndType {
|
||||
vectorized::PathInData path;
|
||||
vectorized::ColumnPtr column;
|
||||
vectorized::DataTypePtr type;
|
||||
};
|
||||
|
||||
using PathsWithColumnAndType = std::vector<PathWithColumnAndType>;
|
||||
|
||||
// Reader for hierarchical data for variant, merge with root(sparse encoded columns)
|
||||
class HierarchicalDataReader : public ColumnIterator {
|
||||
public:
|
||||
|
||||
@ -805,11 +805,7 @@ void ColumnObject::try_insert(const Field& field) {
|
||||
}
|
||||
const auto& object = field.get<const VariantMap&>();
|
||||
size_t old_size = size();
|
||||
for (const auto& [key_str, value] : object) {
|
||||
PathInData key;
|
||||
if (!key_str.empty()) {
|
||||
key = PathInData(key_str);
|
||||
}
|
||||
for (const auto& [key, value] : object) {
|
||||
if (!has_subcolumn(key)) {
|
||||
bool succ = add_sub_column(key, old_size);
|
||||
if (!succ) {
|
||||
@ -894,7 +890,7 @@ void ColumnObject::get(size_t n, Field& res) const {
|
||||
auto& object = res.get<VariantMap&>();
|
||||
|
||||
for (const auto& entry : subcolumns) {
|
||||
auto it = object.try_emplace(entry->path.get_path()).first;
|
||||
auto it = object.try_emplace(entry->path).first;
|
||||
entry->data.get(n, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
@ -43,6 +43,7 @@
|
||||
#include "util/quantile_state.h"
|
||||
#include "vec/common/uint128.h"
|
||||
#include "vec/core/types.h"
|
||||
#include "vec/json/path_in_data.h"
|
||||
|
||||
namespace doris {
|
||||
namespace vectorized {
|
||||
@ -153,13 +154,7 @@ DEFINE_FIELD_VECTOR(Tuple);
|
||||
DEFINE_FIELD_VECTOR(Map);
|
||||
#undef DEFINE_FIELD_VECTOR
|
||||
|
||||
using FieldMap = std::map<String, Field, std::less<String>>;
|
||||
#define DEFINE_FIELD_MAP(X) \
|
||||
struct X : public FieldMap { \
|
||||
using FieldMap::FieldMap; \
|
||||
}
|
||||
DEFINE_FIELD_MAP(VariantMap);
|
||||
#undef DEFINE_FIELD_MAP
|
||||
using VariantMap = std::map<PathInData, Field>;
|
||||
|
||||
class JsonbField {
|
||||
public:
|
||||
|
||||
@ -69,6 +69,7 @@ int64_t DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
|
||||
}
|
||||
PColumnMeta column_meta_pb;
|
||||
column_meta_pb.set_name(entry->path.get_path());
|
||||
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
|
||||
type->to_pb_column_meta(&column_meta_pb);
|
||||
std::string meta_binary;
|
||||
column_meta_pb.SerializeToString(&meta_binary);
|
||||
@ -112,6 +113,7 @@ char* DataTypeObject::serialize(const IColumn& column, char* buf, int be_exec_ve
|
||||
++num_of_columns;
|
||||
PColumnMeta column_meta_pb;
|
||||
column_meta_pb.set_name(entry->path.get_path());
|
||||
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
|
||||
type->to_pb_column_meta(&column_meta_pb);
|
||||
std::string meta_binary;
|
||||
column_meta_pb.SerializeToString(&meta_binary);
|
||||
@ -157,11 +159,15 @@ const char* DataTypeObject::deserialize(const char* buf, IColumn* column,
|
||||
MutableColumnPtr sub_column = type->create_column();
|
||||
buf = type->deserialize(buf, sub_column.get(), be_exec_version);
|
||||
|
||||
// add subcolumn to column_object
|
||||
PathInData key;
|
||||
if (!column_meta_pb.name().empty()) {
|
||||
if (column_meta_pb.has_column_path()) {
|
||||
// init from path pb
|
||||
key.from_protobuf(column_meta_pb.column_path());
|
||||
} else if (!column_meta_pb.name().empty()) {
|
||||
// init from name for compatible
|
||||
key = PathInData {column_meta_pb.name()};
|
||||
}
|
||||
// add subcolumn to column_object
|
||||
column_object->add_sub_column(key, std::move(sub_column), type);
|
||||
}
|
||||
size_t num_rows = 0;
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "runtime/primitive_type.h"
|
||||
#include "util/jsonb_writer.h"
|
||||
#include "vec/columns/column.h"
|
||||
#include "vec/common/string_ref.h"
|
||||
@ -120,6 +121,13 @@ enum class ExtractType {
|
||||
ToString = 0,
|
||||
// ...
|
||||
};
|
||||
/// Result of parsing of a document.
|
||||
/// Contains all paths extracted from document
|
||||
/// and values which are related to them.
|
||||
struct ParseResult {
|
||||
std::vector<PathInData> paths;
|
||||
std::vector<Field> values;
|
||||
};
|
||||
template <typename ParserImpl, bool parse_nested = false>
|
||||
class JSONDataParser {
|
||||
public:
|
||||
|
||||
@ -30,7 +30,6 @@
|
||||
|
||||
#include "gen_cpp/segment_v2.pb.h"
|
||||
#include "vec/common/uint128.h"
|
||||
#include "vec/core/field.h"
|
||||
#include "vec/core/types.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
@ -123,13 +122,6 @@ private:
|
||||
size_t current_anonymous_array_level = 0;
|
||||
};
|
||||
using PathsInData = std::vector<PathInData>;
|
||||
/// Result of parsing of a document.
|
||||
/// Contains all paths extracted from document
|
||||
/// and values which are related to them.
|
||||
struct ParseResult {
|
||||
std::vector<PathInData> paths;
|
||||
std::vector<Field> values;
|
||||
};
|
||||
|
||||
struct PathInDataRef {
|
||||
const PathInData* ref;
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "vec/columns/common_column_test.h"
|
||||
#include "vec/json/path_in_data.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
|
||||
@ -62,6 +62,8 @@ message PColumnMeta {
|
||||
repeated PColumnMeta children = 5;
|
||||
optional bool result_is_nullable = 6;
|
||||
optional string function_name = 7;
|
||||
optional int32 be_exec_version = 8;
|
||||
optional segment_v2.ColumnPathInfo column_path = 9;
|
||||
}
|
||||
|
||||
message PBlock {
|
||||
|
||||
@ -37,13 +37,25 @@ UPPER CASE lower case
|
||||
\N
|
||||
\N
|
||||
\N
|
||||
""
|
||||
""
|
||||
|
||||
|
||||
1234566
|
||||
16
|
||||
8888888
|
||||
"UPPER CASE"
|
||||
"dkdkdkdkdkd"
|
||||
"ooaoaaaaaaa"
|
||||
"xmxxmmmmmm"
|
||||
UPPER CASE
|
||||
dkdkdkdkdkd
|
||||
ooaoaaaaaaa
|
||||
xmxxmmmmmm
|
||||
|
||||
-- !sql_cnt_1 --
|
||||
128
|
||||
|
||||
-- !sql_cnt_2 --
|
||||
128
|
||||
|
||||
-- !sql_cnt_3 --
|
||||
128
|
||||
|
||||
-- !sql_cnt_4 --
|
||||
128
|
||||
|
||||
|
||||
@ -25,7 +25,7 @@ suite("regression_test_variant_column_name", "variant_type"){
|
||||
)
|
||||
DUPLICATE KEY(`k`)
|
||||
DISTRIBUTED BY HASH(k) BUCKETS 1
|
||||
properties("replication_num" = "1", "disable_auto_compaction" = "true");
|
||||
properties("replication_num" = "1", "disable_auto_compaction" = "false");
|
||||
"""
|
||||
|
||||
// sql "set experimental_enable_nereids_planner = false"
|
||||
@ -63,7 +63,18 @@ suite("regression_test_variant_column_name", "variant_type"){
|
||||
sql """insert into var_column_name values (7, '{"": 1234566}')"""
|
||||
sql """insert into var_column_name values (7, '{"": 8888888}')"""
|
||||
|
||||
qt_sql "select Tags[''] from var_column_name order by cast(Tags[''] as string)"
|
||||
qt_sql "select cast(Tags[''] as text) from var_column_name order by cast(Tags[''] as string)"
|
||||
|
||||
// name with `.`
|
||||
sql "truncate table var_column_name"
|
||||
sql """insert into var_column_name values (7, '{"a.b": "UPPER CASE", "a.c": "lower case", "a" : {"b" : 123}, "a" : {"c" : 456}}')"""
|
||||
for (int i = 0; i < 7; i++) {
|
||||
sql """insert into var_column_name select * from var_column_name"""
|
||||
}
|
||||
qt_sql_cnt_1 "select count(Tags['a.b']) from var_column_name"
|
||||
qt_sql_cnt_2 "select count(Tags['a.c']) from var_column_name"
|
||||
qt_sql_cnt_3 "select count(Tags['a']['b']) from var_column_name"
|
||||
qt_sql_cnt_4 "select count(Tags['a']['c']) from var_column_name"
|
||||
|
||||
try {
|
||||
sql """insert into var_column_name values (7, '{"": "UPPER CASE", "": "lower case"}')"""
|
||||
|
||||
Reference in New Issue
Block a user