From 0b6447faeb40e30bbe0716be54278ed209adc0da Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Thu, 17 Oct 2024 10:39:07 +0800 Subject: [PATCH] =?UTF-8?q?[Fix](SchemaChange)=20refactor=20variant=20root?= =?UTF-8?q?=20column=20iterator=20to=20make=20row=E2=80=A6=20(#41941)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pick #41700 --- .../olap/rowset/segment_v2/column_reader.cpp | 88 +++++++++---------- be/src/olap/rowset/segment_v2/column_reader.h | 3 + be/src/vec/columns/column_object.cpp | 6 ++ be/src/vec/columns/column_object.h | 2 + 4 files changed, 53 insertions(+), 46 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 4f616ced90..e8108faeaf 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -1501,28 +1501,14 @@ void DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP } } -Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, - bool* has_null) { - size_t size = dst->size(); +Status VariantRootColumnIterator::_process_root_column( + vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& root_column, + const vectorized::DataTypePtr& most_common_type) { auto& obj = dst->is_nullable() ? assert_cast( assert_cast(*dst).get_nested_column()) : assert_cast(*dst); - if (obj.is_null_root()) { - obj.create_root(); - } - if (!obj.is_finalized()) { - obj.finalize(); - } - auto root_column = obj.get_root(); - RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null)); - obj.incr_num_rows(*n); - for (auto& entry : obj.get_subcolumns()) { - if (entry->data.size() != size + *n) { - entry->data.insertManyDefaults(*n); - } - } // fill nullmap if (root_column->is_nullable() && dst->is_nullable()) { vectorized::ColumnUInt8& dst_null_map = @@ -1531,47 +1517,57 @@ Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColum assert_cast(*root_column).get_null_map_column(); dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); } + + // add root column to a tmp object column + auto tmp = vectorized::ColumnObject::create(true, false); + auto& tmp_obj = assert_cast(*tmp); + tmp_obj.add_sub_column({}, std::move(root_column), most_common_type); + + // merge tmp object column to dst + obj.insert_range_from(*tmp, 0, tmp->size()); + + // finalize object if needed + if (!obj.is_finalized()) { + obj.finalize(); + } + #ifndef NDEBUG obj.check_consistency(); #endif + return Status::OK(); } -Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, - vectorized::MutableColumnPtr& dst) { - size_t size = dst->size(); +Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, + bool* has_null) { + // read root column auto& obj = dst->is_nullable() ? assert_cast( assert_cast(*dst).get_nested_column()) : assert_cast(*dst); - if (obj.is_null_root()) { - obj.create_root(); - } - if (!obj.is_finalized()) { - obj.finalize(); - } - auto root_column = obj.get_root(); + + auto most_common_type = obj.get_most_common_type(); + auto root_column = most_common_type->create_column(); + RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null)); + + return _process_root_column(dst, root_column, most_common_type); +} + +Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, + vectorized::MutableColumnPtr& dst) { + // read root column + auto& obj = + dst->is_nullable() + ? assert_cast( + assert_cast(*dst).get_nested_column()) + : assert_cast(*dst); + + auto most_common_type = obj.get_most_common_type(); + auto root_column = most_common_type->create_column(); RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column)); - obj.incr_num_rows(count); - for (auto& entry : obj.get_subcolumns()) { - if (entry->data.size() != (size + count)) { - entry->data.insertManyDefaults(count); - } - } - // fill nullmap - if (root_column->is_nullable() && dst->is_nullable()) { - vectorized::ColumnUInt8& dst_null_map = - assert_cast(*dst).get_null_map_column(); - vectorized::ColumnUInt8& src_null_map = - assert_cast(*root_column).get_null_map_column(); - DCHECK_EQ(src_null_map.size() - size, count); - dst_null_map.insert_range_from(src_null_map, size, count); - } -#ifndef NDEBUG - obj.check_consistency(); -#endif - return Status::OK(); + + return _process_root_column(dst, root_column, most_common_type); } } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index f7330b1727..0478427bcd 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -633,6 +633,9 @@ public: ordinal_t get_current_ordinal() const override { return _inner_iter->get_current_ordinal(); } private: + Status _process_root_column(vectorized::MutableColumnPtr& dst, + vectorized::MutableColumnPtr& root_column, + const vectorized::DataTypePtr& most_common_type); std::unique_ptr _inner_iter; }; diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 8c139ba8fb..25b614b06d 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1563,6 +1563,12 @@ void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& colum add_sub_column({}, std::move(column), type); } +DataTypePtr ColumnObject::get_most_common_type() const { + auto type = is_nullable ? make_nullable(std::make_shared()) + : std::make_shared(); + return type; +} + bool ColumnObject::is_null_root() const { auto* root = subcolumns.get_root(); if (root == nullptr) { diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 516d232463..20848e9d0a 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -274,6 +274,8 @@ public: // create root with type and column if missing void create_root(const DataTypePtr& type, MutableColumnPtr&& column); + DataTypePtr get_most_common_type() const; + // root is null or type nothing bool is_null_root() const;