From 1244eed1cd88bc49e915039e8a559ab92353ebd1 Mon Sep 17 00:00:00 2001 From: HappenLee Date: Wed, 1 Mar 2023 23:25:40 +0800 Subject: [PATCH] [Opt](exec) opt the dispose nullable column logic (#17192) --- be/src/vec/core/block.cpp | 36 ++++++++++------- be/src/vec/core/block.h | 6 +-- .../format/parquet/vparquet_group_reader.cpp | 4 +- .../vec/exec/join/vnested_loop_join_node.cpp | 4 +- be/src/vec/functions/function.cpp | 40 ++++++++++++------- 5 files changed, 54 insertions(+), 36 deletions(-) diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index ec84a19b7b..4ac86f213a 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -177,15 +177,26 @@ void Block::erase(const std::set& positions) { } } -void Block::erase(size_t position) { - if (data.empty()) { - LOG(FATAL) << "Block is empty"; +void Block::erase_tail(size_t start) { + DCHECK(start <= data.size()) << fmt::format( + "Position out of bound in Block::erase(), max position = {}", data.size()); + data.erase(data.begin() + start, data.end()); + for (auto it = index_by_name.begin(); it != index_by_name.end();) { + if (it->second >= start) { + index_by_name.erase(it++); + } else { + ++it; + } } + if (start < row_same_bit.size()) { + row_same_bit.erase(row_same_bit.begin() + start, row_same_bit.end()); + } +} - if (position >= data.size()) { - LOG(FATAL) << fmt::format("Position out of bound in Block::erase(), max position = {}", - data.size() - 1); - } +void Block::erase(size_t position) { + DCHECK(!data.empty()) << "Block is empty"; + DCHECK(position < data.size()) << fmt::format( + "Position out of bound in Block::erase(), max position = {}", data.size() - 1); erase_impl(position); } @@ -700,21 +711,16 @@ void Block::append_block_by_selector(MutableBlock* dst, const IColumn::Selector& Status Block::filter_block(Block* block, const std::vector& columns_to_filter, int filter_column_id, int column_to_keep) { - ColumnPtr filter_column = block->get_by_position(filter_column_id).column; + const auto& filter_column = block->get_by_position(filter_column_id).column; if (auto* nullable_column = check_and_get_column(*filter_column)) { - ColumnPtr nested_column = nullable_column->get_nested_column_ptr(); + const auto& nested_column = nullable_column->get_nested_column_ptr(); MutableColumnPtr mutable_holder = nested_column->use_count() == 1 ? nested_column->assume_mutable() : nested_column->clone_resized(nested_column->size()); - ColumnUInt8* concrete_column = typeid_cast(mutable_holder.get()); - if (!concrete_column) { - return Status::InvalidArgument( - "Illegal type {} of column for filter. Must be UInt8 or Nullable(UInt8).", - filter_column->get_name()); - } + ColumnUInt8* concrete_column = assert_cast(mutable_holder.get()); auto* __restrict null_map = nullable_column->get_null_map_data().data(); IColumn::Filter& filter = concrete_column->get_data(); auto* __restrict filter_data = filter.data(); diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index a57ca2c543..ba1809f19a 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -100,6 +100,8 @@ public: void insert_unique(ColumnWithTypeAndName&& elem); /// remove the column at the specified position void erase(size_t position); + /// remove the column at the [start, end) + void erase_tail(size_t start); /// remove the columns at the specified positions void erase(const std::set& positions); /// remove the column with the specified name @@ -290,9 +292,7 @@ public: static Status filter_block(Block* block, int filter_column_id, int column_to_keep); static void erase_useless_column(Block* block, int column_to_keep) { - for (int i = block->columns() - 1; i >= column_to_keep; --i) { - block->erase(i); - } + block->erase_tail(column_to_keep); } // serialize block to PBlock diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index b5a3bf37b6..71f77f3735 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -256,7 +256,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re // generated from next batch, so the filter column is removed ahead. DCHECK_EQ(block->rows(), 0); } else { - ColumnPtr& filter_column = block->get_by_position(filter_column_id).column; + const auto& filter_column = block->get_by_position(filter_column_id).column; RETURN_IF_ERROR(_filter_block(block, filter_column, origin_column_num, _lazy_read_ctx.all_predicate_col_ids)); } @@ -496,7 +496,7 @@ Status RowGroupReader::_build_pos_delete_filter(size_t read_rows) { Status RowGroupReader::_filter_block(Block* block, const ColumnPtr& filter_column, int column_to_keep, std::vector columns_to_filter) { if (auto* nullable_column = check_and_get_column(*filter_column)) { - const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr(); + const auto& nested_column = nullable_column->get_nested_column_ptr(); MutableColumnPtr mutable_holder = nested_column->use_count() == 1 diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp b/be/src/vec/exec/join/vnested_loop_join_node.cpp index 8ba770c106..2555cea209 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.cpp +++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp @@ -525,9 +525,9 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block, DCHECK((*_vjoin_conjunct_ptr) != nullptr); int result_column_id = -1; RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->execute(block, &result_column_id)); - ColumnPtr filter_column = block->get_by_position(result_column_id).column; + const auto& filter_column = block->get_by_position(result_column_id).column; if (auto* nullable_column = check_and_get_column(*filter_column)) { - ColumnPtr nested_column = nullable_column->get_nested_column_ptr(); + const auto& nested_column = nullable_column->get_nested_column_ptr(); MutableColumnPtr mutable_holder = nested_column->use_count() == 1 diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index 662a2a58af..e7c6871690 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -37,35 +37,40 @@ namespace doris::vectorized { ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const ColumnNumbers& args, size_t result, size_t input_rows_count) { ColumnPtr result_null_map_column; - /// If result is already nullable. ColumnPtr src_not_nullable = src; + MutableColumnPtr mutable_result_null_map_column; - if (src->only_null()) - return src; - else if (auto* nullable = check_and_get_column(*src)) { + if (auto* nullable = check_and_get_column(*src)) { src_not_nullable = nullable->get_nested_column_ptr(); result_null_map_column = nullable->get_null_map_column_ptr(); } for (const auto& arg : args) { const ColumnWithTypeAndName& elem = block.get_by_position(arg); - if (!elem.type->is_nullable()) continue; + if (!elem.type->is_nullable()) { + continue; + } + bool is_const = is_column_const(*elem.column); /// Const Nullable that are NULL. - if (elem.column->only_null()) + if (is_const && assert_cast(elem.column.get())->only_null()) { return block.get_by_position(result).type->create_column_const(input_rows_count, Null()); + } + if (is_const) { + continue; + } - if (is_column_const(*elem.column)) continue; - - if (auto* nullable = check_and_get_column(*elem.column)) { + if (auto* nullable = assert_cast(elem.column.get())) { const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr(); if (!result_null_map_column) { - result_null_map_column = null_map_column->clone_resized(null_map_column->size()); + result_null_map_column = null_map_column->clone_resized(input_rows_count); } else { - MutableColumnPtr mutable_result_null_map_column = - (*std::move(result_null_map_column)).assume_mutable(); + if (!mutable_result_null_map_column) { + mutable_result_null_map_column = + (*std::move(result_null_map_column)).assume_mutable(); + } NullMap& result_null_map = assert_cast(*mutable_result_null_map_column).get_data(); @@ -73,12 +78,19 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum assert_cast(*null_map_column).get_data(); VectorizedUtils::update_null_map(result_null_map, src_null_map); - result_null_map_column = std::move(mutable_result_null_map_column); } } } - if (!result_null_map_column) return make_nullable(src); + if (!result_null_map_column) { + if (is_column_const(*src)) { + return ColumnConst::create( + make_nullable(assert_cast(*src).get_data_column_ptr(), + false), + input_rows_count); + } + return ColumnNullable::create(src, ColumnUInt8::create(input_rows_count, 0)); + } return ColumnNullable::create(src_not_nullable->convert_to_full_column_if_const(), result_null_map_column);