From 9ebacb1faa922ab6714cedaf23f7c7b08d268ea0 Mon Sep 17 00:00:00 2001 From: TengJianPing <18241664+jacktengg@users.noreply.github.com> Date: Mon, 18 Dec 2023 12:01:55 +0800 Subject: [PATCH] [fix](expr) fix performance problem caused by too many virtual function call (#28508) --- be/src/vec/columns/column.h | 2 + be/src/vec/columns/column_decimal.cpp | 12 ++++++ be/src/vec/columns/column_decimal.h | 2 + be/src/vec/columns/column_nullable.cpp | 10 ----- be/src/vec/columns/column_nullable.h | 2 - be/src/vec/columns/column_vector.cpp | 12 ++++++ be/src/vec/columns/column_vector.h | 2 + be/src/vec/functions/function.cpp | 27 +++++------- be/src/vec/functions/function.h | 18 ++++++++ .../functions/function_binary_arithmetic.h | 10 +++++ be/src/vec/functions/function_helpers.cpp | 35 ++++++++++----- be/src/vec/functions/function_helpers.h | 12 +++--- .../datatype_p0/decimalv3/fix-overflow.out | 12 ++++++ .../datatype_p0/decimalv3/fix-overflow.groovy | 43 +++++++++++++++++++ 14 files changed, 153 insertions(+), 46 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index b2cf72b016..25ad1145fc 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -697,6 +697,8 @@ public: // only used in ColumnNullable replace_column_data virtual void replace_column_data_default(size_t self_row = 0) = 0; + virtual void replace_column_null_data(const uint8_t* __restrict null_map) {} + virtual bool is_date_type() const { return is_date; } virtual bool is_datetime_type() const { return is_date_time; } diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index 07508f8c6a..1158e5b0d6 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -515,6 +515,18 @@ ColumnPtr ColumnDecimal::index(const IColumn& indexes, size_t limit) const { return select_index_impl(*this, indexes, limit); } +template +void ColumnDecimal::replace_column_null_data(const uint8_t* __restrict null_map) { + auto s = size(); + size_t null_count = s - simd::count_zero_num((const int8_t*)null_map, s); + if (0 == null_count) { + return; + } + for (size_t i = 0; i < s; ++i) { + data[i] = null_map[i] ? T() : data[i]; + } +} + template class ColumnDecimal; template class ColumnDecimal; template class ColumnDecimal; diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index dfdfbb0d6b..9b1ad69bba 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -261,6 +261,8 @@ public: data[self_row] = T(); } + void replace_column_null_data(const uint8_t* __restrict null_map) override; + void sort_column(const ColumnSorter* sorter, EqualFlags& flags, IColumn::Permutation& perms, EqualRange& range, bool last_column) const override; diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 98ce01dd57..ecf330bead 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -50,16 +50,6 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnP _need_update_has_null = true; } -void ColumnNullable::update_null_data() { - const auto& null_map_data = _get_null_map_data(); - auto s = size(); - for (size_t i = 0; i < s; ++i) { - if (null_map_data[i]) { - nested_column->replace_column_data_default(i); - } - } -} - MutableColumnPtr ColumnNullable::get_shrinked_column() { return ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(), get_null_map_column_ptr()); diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 8eb1f4eedb..10b0951ab8 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -83,8 +83,6 @@ public: return Base::create(std::forward(args)...); } - void update_null_data(); - MutableColumnPtr get_shrinked_column() override; const char* get_family_name() const override { return "Nullable"; } diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 65b1b6308e..45d9e8f70b 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -557,6 +557,18 @@ ColumnPtr ColumnVector::index(const IColumn& indexes, size_t limit) const { return select_index_impl(*this, indexes, limit); } +template +void ColumnVector::replace_column_null_data(const uint8_t* __restrict null_map) { + auto s = size(); + size_t null_count = s - simd::count_zero_num((const int8_t*)null_map, s); + if (0 == null_count) { + return; + } + for (size_t i = 0; i < s; ++i) { + data[i] = null_map[i] ? T() : data[i]; + } +} + /// Explicit template instantiations - to avoid code bloat in headers. template class ColumnVector; template class ColumnVector; diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 772162bc87..384a8daa1c 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -474,6 +474,8 @@ public: data[self_row] = T(); } + void replace_column_null_data(const uint8_t* __restrict null_map) override; + void sort_column(const ColumnSorter* sorter, EqualFlags& flags, IColumn::Permutation& perms, EqualRange& range, bool last_column) const override; diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index b92f5453bf..6e7f6572ab 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -99,21 +99,8 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum return ColumnNullable::create(src, ColumnUInt8::create(input_rows_count, 0)); } - bool update_null_data = false; - auto full_column = src_not_nullable->convert_to_full_column_if_const(); - if (const auto* nullable = check_and_get_column(full_column.get())) { - const auto& nested_column = nullable->get_nested_column(); - update_null_data = nested_column.is_numeric() || nested_column.is_column_decimal(); - } else { - update_null_data = full_column->is_numeric() || full_column->is_column_decimal(); - } - auto result_column = ColumnNullable::create(full_column, result_null_map_column); - if (update_null_data) { - auto* res_nullable_column = - assert_cast(std::move(*result_column).mutate().get()); - res_nullable_column->update_null_data(); - } - return result_column; + return ColumnNullable::create(src_not_nullable->convert_to_full_column_if_const(), + result_null_map_column); } NullPresence get_null_presence(const Block& block, const ColumnNumbers& args) { @@ -247,8 +234,14 @@ Status PreparedFunctionImpl::default_implementation_for_nulls( } if (null_presence.has_nullable) { - auto [temporary_block, new_args, new_result] = - create_block_with_nested_columns(block, args, result); + bool check_overflow_for_decimal = false; + if (context) { + check_overflow_for_decimal = context->check_overflow_for_decimal(); + } + auto [temporary_block, new_args, new_result] = create_block_with_nested_columns( + block, args, result, + check_overflow_for_decimal && need_replace_null_data_to_default()); + RETURN_IF_ERROR(execute_without_low_cardinality_columns( context, temporary_block, new_args, new_result, temporary_block.rows(), dry_run)); block.get_by_position(result).column = diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 63cf78c417..cb8ff34cdb 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -102,6 +102,12 @@ public: */ virtual bool use_default_implementation_for_constants() const { return true; } + /** If use_default_implementation_for_nulls() is true, after execute the function, + * whether need to replace the nested data of null data to the default value. + * E.g. for binary arithmetic exprs, need return true to avoid false overflow. + */ + virtual bool need_replace_null_data_to_default() const { return false; } + protected: virtual Status execute_impl_dry_run(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, @@ -393,6 +399,8 @@ protected: */ virtual bool use_default_implementation_for_nulls() const { return true; } + virtual bool need_replace_null_data_to_default() const { return false; } + /** If use_default_implementation_for_nulls() is true, than change arguments for get_return_type() and build_impl(). * If function arguments has low cardinality types, convert them to ordinary types. * get_return_type returns ColumnLowCardinality if at least one argument type is ColumnLowCardinality. @@ -434,6 +442,9 @@ public: /// Override this functions to change default implementation behavior. See details in IMyFunction. bool use_default_implementation_for_nulls() const override { return true; } + + bool need_replace_null_data_to_default() const override { return false; } + bool use_default_implementation_for_low_cardinality_columns() const override { return true; } /// all constancy check should use this function to do automatically @@ -506,6 +517,9 @@ protected: bool use_default_implementation_for_nulls() const final { return function->use_default_implementation_for_nulls(); } + bool need_replace_null_data_to_default() const final { + return function->need_replace_null_data_to_default(); + } bool use_default_implementation_for_constants() const final { return function->use_default_implementation_for_constants(); } @@ -629,6 +643,10 @@ protected: bool use_default_implementation_for_nulls() const override { return function->use_default_implementation_for_nulls(); } + + bool need_replace_null_data_to_default() const override { + return function->need_replace_null_data_to_default(); + } bool use_default_implementation_for_low_cardinality_columns() const override { return function->use_default_implementation_for_low_cardinality_columns(); } diff --git a/be/src/vec/functions/function_binary_arithmetic.h b/be/src/vec/functions/function_binary_arithmetic.h index 217118d7e6..844d60b8e8 100644 --- a/be/src/vec/functions/function_binary_arithmetic.h +++ b/be/src/vec/functions/function_binary_arithmetic.h @@ -840,6 +840,8 @@ template