diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h index 3197e95cf7..b9a4ff25ce 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h @@ -61,6 +61,14 @@ struct SubcolumnReader { }; using SubcolumnColumnReaders = vectorized::SubcolumnsTree; +struct PathWithColumnAndType { + vectorized::PathInData path; + vectorized::ColumnPtr column; + vectorized::DataTypePtr type; +}; + +using PathsWithColumnAndType = std::vector; + // Reader for hierarchical data for variant, merge with root(sparse encoded columns) class HierarchicalDataReader : public ColumnIterator { public: diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index edc0177892..2d454ed449 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -805,11 +805,7 @@ void ColumnObject::try_insert(const Field& field) { } const auto& object = field.get(); size_t old_size = size(); - for (const auto& [key_str, value] : object) { - PathInData key; - if (!key_str.empty()) { - key = PathInData(key_str); - } + for (const auto& [key, value] : object) { if (!has_subcolumn(key)) { bool succ = add_sub_column(key, old_size); if (!succ) { @@ -894,7 +890,7 @@ void ColumnObject::get(size_t n, Field& res) const { auto& object = res.get(); for (const auto& entry : subcolumns) { - auto it = object.try_emplace(entry->path.get_path()).first; + auto it = object.try_emplace(entry->path).first; entry->data.get(n, it->second); } } diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 8113dc602f..922f9abb13 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -43,6 +43,7 @@ #include "util/quantile_state.h" #include "vec/common/uint128.h" #include "vec/core/types.h" +#include "vec/json/path_in_data.h" namespace doris { namespace vectorized { @@ -153,13 +154,7 @@ DEFINE_FIELD_VECTOR(Tuple); DEFINE_FIELD_VECTOR(Map); #undef DEFINE_FIELD_VECTOR -using FieldMap = std::map>; -#define DEFINE_FIELD_MAP(X) \ - struct X : public FieldMap { \ - using FieldMap::FieldMap; \ - } -DEFINE_FIELD_MAP(VariantMap); -#undef DEFINE_FIELD_MAP +using VariantMap = std::map; class JsonbField { public: diff --git a/be/src/vec/data_types/data_type_object.cpp b/be/src/vec/data_types/data_type_object.cpp index c3c43c1bf6..40637ced6c 100644 --- a/be/src/vec/data_types/data_type_object.cpp +++ b/be/src/vec/data_types/data_type_object.cpp @@ -69,6 +69,7 @@ int64_t DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column, } PColumnMeta column_meta_pb; column_meta_pb.set_name(entry->path.get_path()); + entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/); type->to_pb_column_meta(&column_meta_pb); std::string meta_binary; column_meta_pb.SerializeToString(&meta_binary); @@ -112,6 +113,7 @@ char* DataTypeObject::serialize(const IColumn& column, char* buf, int be_exec_ve ++num_of_columns; PColumnMeta column_meta_pb; column_meta_pb.set_name(entry->path.get_path()); + entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/); type->to_pb_column_meta(&column_meta_pb); std::string meta_binary; column_meta_pb.SerializeToString(&meta_binary); @@ -157,11 +159,15 @@ const char* DataTypeObject::deserialize(const char* buf, IColumn* column, MutableColumnPtr sub_column = type->create_column(); buf = type->deserialize(buf, sub_column.get(), be_exec_version); - // add subcolumn to column_object PathInData key; - if (!column_meta_pb.name().empty()) { + if (column_meta_pb.has_column_path()) { + // init from path pb + key.from_protobuf(column_meta_pb.column_path()); + } else if (!column_meta_pb.name().empty()) { + // init from name for compatible key = PathInData {column_meta_pb.name()}; } + // add subcolumn to column_object column_object->add_sub_column(key, std::move(sub_column), type); } size_t num_rows = 0; diff --git a/be/src/vec/json/json_parser.h b/be/src/vec/json/json_parser.h index 576c7dcba7..115d661d4d 100644 --- a/be/src/vec/json/json_parser.h +++ b/be/src/vec/json/json_parser.h @@ -28,6 +28,7 @@ #include #include +#include "runtime/primitive_type.h" #include "util/jsonb_writer.h" #include "vec/columns/column.h" #include "vec/common/string_ref.h" @@ -120,6 +121,13 @@ enum class ExtractType { ToString = 0, // ... }; +/// Result of parsing of a document. +/// Contains all paths extracted from document +/// and values which are related to them. +struct ParseResult { + std::vector paths; + std::vector values; +}; template class JSONDataParser { public: diff --git a/be/src/vec/json/path_in_data.h b/be/src/vec/json/path_in_data.h index 1367970f10..2b511db441 100644 --- a/be/src/vec/json/path_in_data.h +++ b/be/src/vec/json/path_in_data.h @@ -30,7 +30,6 @@ #include "gen_cpp/segment_v2.pb.h" #include "vec/common/uint128.h" -#include "vec/core/field.h" #include "vec/core/types.h" namespace doris::vectorized { @@ -123,13 +122,6 @@ private: size_t current_anonymous_array_level = 0; }; using PathsInData = std::vector; -/// Result of parsing of a document. -/// Contains all paths extracted from document -/// and values which are related to them. -struct ParseResult { - std::vector paths; - std::vector values; -}; struct PathInDataRef { const PathInData* ref; diff --git a/be/test/vec/columns/column_object_test.cpp b/be/test/vec/columns/column_object_test.cpp index 06d987e414..a7498e82e8 100644 --- a/be/test/vec/columns/column_object_test.cpp +++ b/be/test/vec/columns/column_object_test.cpp @@ -22,6 +22,7 @@ #include #include "vec/columns/common_column_test.h" +#include "vec/json/path_in_data.h" namespace doris::vectorized { diff --git a/gensrc/proto/data.proto b/gensrc/proto/data.proto index 755a3a042d..7cb126e973 100644 --- a/gensrc/proto/data.proto +++ b/gensrc/proto/data.proto @@ -62,6 +62,8 @@ message PColumnMeta { repeated PColumnMeta children = 5; optional bool result_is_nullable = 6; optional string function_name = 7; + optional int32 be_exec_version = 8; + optional segment_v2.ColumnPathInfo column_path = 9; } message PBlock { diff --git a/regression-test/data/variant_p0/column_name.out b/regression-test/data/variant_p0/column_name.out index 6ac882d292..0f54df05d9 100644 --- a/regression-test/data/variant_p0/column_name.out +++ b/regression-test/data/variant_p0/column_name.out @@ -37,13 +37,25 @@ UPPER CASE lower case \N \N \N -"" -"" + + 1234566 16 8888888 -"UPPER CASE" -"dkdkdkdkdkd" -"ooaoaaaaaaa" -"xmxxmmmmmm" +UPPER CASE +dkdkdkdkdkd +ooaoaaaaaaa +xmxxmmmmmm + +-- !sql_cnt_1 -- +128 + +-- !sql_cnt_2 -- +128 + +-- !sql_cnt_3 -- +128 + +-- !sql_cnt_4 -- +128 diff --git a/regression-test/suites/variant_p0/column_name.groovy b/regression-test/suites/variant_p0/column_name.groovy index 26520aafa5..39b2b9766c 100644 --- a/regression-test/suites/variant_p0/column_name.groovy +++ b/regression-test/suites/variant_p0/column_name.groovy @@ -25,7 +25,7 @@ suite("regression_test_variant_column_name", "variant_type"){ ) DUPLICATE KEY(`k`) DISTRIBUTED BY HASH(k) BUCKETS 1 - properties("replication_num" = "1", "disable_auto_compaction" = "true"); + properties("replication_num" = "1", "disable_auto_compaction" = "false"); """ // sql "set experimental_enable_nereids_planner = false" @@ -63,7 +63,18 @@ suite("regression_test_variant_column_name", "variant_type"){ sql """insert into var_column_name values (7, '{"": 1234566}')""" sql """insert into var_column_name values (7, '{"": 8888888}')""" - qt_sql "select Tags[''] from var_column_name order by cast(Tags[''] as string)" + qt_sql "select cast(Tags[''] as text) from var_column_name order by cast(Tags[''] as string)" + + // name with `.` + sql "truncate table var_column_name" + sql """insert into var_column_name values (7, '{"a.b": "UPPER CASE", "a.c": "lower case", "a" : {"b" : 123}, "a" : {"c" : 456}}')""" + for (int i = 0; i < 7; i++) { + sql """insert into var_column_name select * from var_column_name""" + } + qt_sql_cnt_1 "select count(Tags['a.b']) from var_column_name" + qt_sql_cnt_2 "select count(Tags['a.c']) from var_column_name" + qt_sql_cnt_3 "select count(Tags['a']['b']) from var_column_name" + qt_sql_cnt_4 "select count(Tags['a']['c']) from var_column_name" try { sql """insert into var_column_name values (7, '{"": "UPPER CASE", "": "lower case"}')"""