diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 9941f5944b..e015fdb307 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -27,6 +27,14 @@ #include "vec/common/assert_cast.h" #include "vec/core/types.h" +//TODO: use marcos below to decouple array function calls +#define ALL_COLUMNS_NUMBER \ + ColumnUInt8, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnInt128, ColumnFloat32, \ + ColumnFloat64, ColumnDecimal32, ColumnDecimal64, ColumnDecimal128I, ColumnDecimal128 +#define ALL_COLUMNS_TIME ColumnDate, ColumnDateTime, ColumnDateV2, ColumnDateTimeV2 +#define ALL_COLUMNS_NUMERIC ALL_COLUMNS_NUMBER, ALL_COLUMNS_TIME +#define ALL_COLUMNS_SIMPLE ALL_COLUMNS_NUMERIC, ColumnString + namespace doris::vectorized { /** Obtaining array as Field can be slow for large arrays and consume vast amount of memory. diff --git a/be/src/vec/functions/array/function_array_binary.h b/be/src/vec/functions/array/function_array_binary.h index 152ed7c5ec..d77d7028fe 100644 --- a/be/src/vec/functions/array/function_array_binary.h +++ b/be/src/vec/functions/array/function_array_binary.h @@ -36,6 +36,8 @@ public: bool is_variadic() const override { return false; } size_t get_number_of_arguments() const override { return 2; } + bool use_default_implementation_for_constants() const override { return true; } + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { DCHECK(is_array(arguments[0])) << arguments[0]->get_name(); DCHECK(is_array(arguments[1])) << arguments[1]->get_name(); @@ -51,26 +53,26 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { - auto left_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto right_column = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - Status ret = Status::RuntimeError( - fmt::format("execute failed, unsupported types for function {}({}, {})", get_name(), - block.get_by_position(arguments[0]).type->get_name(), - block.get_by_position(arguments[1]).type->get_name())); + const auto& [left_column, left_const] = + unpack_if_const(block.get_by_position(arguments[0]).column); + const auto& [right_column, right_const] = + unpack_if_const(block.get_by_position(arguments[1]).column); + // extract array column ColumnArrayExecutionData left_data; ColumnArrayExecutionData right_data; ColumnPtr res_ptr = nullptr; if (extract_column_array_info(*left_column, left_data) && - extract_column_array_info(*right_column, right_data)) { - ret = Impl::execute(res_ptr, left_data, right_data); - } - if (ret == Status::OK()) { + extract_column_array_info(*right_column, right_data) && + Impl::execute(res_ptr, left_data, right_data, left_const, right_const) == + Status::OK()) { block.replace_by_position(result, std::move(res_ptr)); + return Status::OK(); } - return ret; + return Status::RuntimeError( + fmt::format("execute failed, unsupported types for function {}({}, {})", get_name(), + block.get_by_position(arguments[0]).type->get_name(), + block.get_by_position(arguments[1]).type->get_name())); } }; diff --git a/be/src/vec/functions/array/function_array_range.cpp b/be/src/vec/functions/array/function_array_range.cpp index 0e2c33f5af..15fb78e578 100644 --- a/be/src/vec/functions/array/function_array_range.cpp +++ b/be/src/vec/functions/array/function_array_range.cpp @@ -46,10 +46,6 @@ public: bool use_default_implementation_for_constants() const override { return true; } - ColumnNumbers get_arguments_that_are_always_constant() const override { - return {get_number_of_arguments()}; - } - size_t get_number_of_arguments() const override { return get_variadic_argument_types_impl().size(); } diff --git a/be/src/vec/functions/array/function_array_set.h b/be/src/vec/functions/array/function_array_set.h index fa9a6451e3..f81c04b7d2 100644 --- a/be/src/vec/functions/array/function_array_set.h +++ b/be/src/vec/functions/array/function_array_set.h @@ -17,6 +17,8 @@ #pragma once +#include + #include "vec/columns/column_array.h" #include "vec/columns/column_string.h" #include "vec/common/hash_table/hash_set.h" @@ -150,7 +152,8 @@ public: } static Status execute(ColumnPtr& res_ptr, const ColumnArrayExecutionData& left_data, - const ColumnArrayExecutionData& right_data) { + const ColumnArrayExecutionData& right_data, bool left_const, + bool right_const) { ColumnArrayMutableData dst; if (left_data.nested_nullmap_data || right_data.nested_nullmap_data) { dst = create_mutable_data(left_data.nested_col, true); @@ -158,35 +161,29 @@ public: dst = create_mutable_data(left_data.nested_col, false); } ColumnPtr res_column; - if (_execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data) || - _execute_internal(dst, left_data, right_data)) { - res_column = assemble_column_array(dst); - if (res_column) { - res_ptr = std::move(res_column); - return Status::OK(); + if (left_const) { + if (_execute_internal(dst, left_data, right_data)) { + res_column = assemble_column_array(dst); } + } else if (right_const) { + if (_execute_internal(dst, left_data, right_data)) { + res_column = assemble_column_array(dst); + } + } else { + if (_execute_internal(dst, left_data, right_data)) { + res_column = assemble_column_array(dst); + } + } + if (res_column) { + res_ptr = std::move(res_column); + return Status::OK(); } return Status::RuntimeError("Unexpected columns: {}, {}", left_data.nested_col->get_name(), right_data.nested_col->get_name()); } private: - template + template static bool _execute_internal(ColumnArrayMutableData& dst, const ColumnArrayExecutionData& left_data, const ColumnArrayExecutionData& right_data) { @@ -199,10 +196,11 @@ private: Impl impl; for (size_t row = 0; row < left_data.offsets_ptr->size(); ++row) { size_t count = 0; - size_t left_off = (*left_data.offsets_ptr)[row - 1]; - size_t left_len = (*left_data.offsets_ptr)[row] - left_off; - size_t right_off = (*right_data.offsets_ptr)[row - 1]; - size_t right_len = (*right_data.offsets_ptr)[row] - right_off; + size_t left_off = (*left_data.offsets_ptr)[index_check_const(row, LCONST) - 1]; + size_t left_len = (*left_data.offsets_ptr)[index_check_const(row, LCONST)] - left_off; + size_t right_off = (*right_data.offsets_ptr)[index_check_const(row, RCONST) - 1]; + size_t right_len = + (*right_data.offsets_ptr)[index_check_const(row, RCONST)] - right_off; if constexpr (execute_left_column_first) { impl.template apply(left_data, left_off, left_len, dst, &count); impl.template apply(right_data, right_off, right_len, dst, &count); @@ -216,6 +214,14 @@ private: } return true; } + template 0), int> = 0> + static bool _execute_internal(ColumnArrayMutableData& dst, + const ColumnArrayExecutionData& left_data, + const ColumnArrayExecutionData& right_data) { + return _execute_internal(dst, left_data, right_data) || + _execute_internal(dst, left_data, right_data); + } }; } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/functions/comparison_equal_for_null.cpp b/be/src/vec/functions/comparison_equal_for_null.cpp index e7e40be15b..dff52a2309 100644 --- a/be/src/vec/functions/comparison_equal_for_null.cpp +++ b/be/src/vec/functions/comparison_equal_for_null.cpp @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include "common/compiler_util.h" #include "vec/columns/column_nullable.h" #include "vec/data_types/get_least_supertype.h" #include "vec/functions/function_string.h" @@ -22,7 +23,7 @@ #include "vec/utils/util.hpp" namespace doris::vectorized { -//TODO: add manual info to docs. +// Operator <=> class FunctionEqForNull : public IFunction { public: static constexpr auto name = "eq_for_null"; @@ -38,17 +39,17 @@ public: } bool use_default_implementation_for_nulls() const override { return false; } + bool use_default_implementation_for_constants() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { ColumnWithTypeAndName& col_left = block.get_by_position(arguments[0]); ColumnWithTypeAndName& col_right = block.get_by_position(arguments[1]); - // TODO: opt for the const column in the future - col_left.column = col_left.column->convert_to_full_column_if_const(); - col_right.column = col_right.column->convert_to_full_column_if_const(); - const auto left_column = check_and_get_column(col_left.column.get()); - const auto right_column = check_and_get_column(col_right.column.get()); + const auto& [left_col, left_const] = unpack_if_const(col_left.column); + const auto& [right_col, right_const] = unpack_if_const(col_right.column); + const auto left_column = check_and_get_column(left_col); + const auto right_column = check_and_get_column(right_col); bool left_nullable = left_column != nullptr; bool right_nullable = right_column != nullptr; @@ -89,13 +90,11 @@ public: auto* __restrict l = left_null_map.data(); auto* __restrict r = right_null_map.data(); - for (int i = 0; i < input_rows_count; ++i) { - res[i] |= l[i] & (l[i] == r[i]); - } + _exec_nullable_equal(res, l, r, input_rows_count, left_const, right_const); } block.get_by_position(result).column = temporary_block.get_by_position(2).column; - } else { + } else { //left_nullable != right_nullable auto return_type = make_nullable(std::make_shared()); const ColumnsWithTypeAndName eq_columns { @@ -118,14 +117,43 @@ public: auto* __restrict res = res_map.data(); auto* __restrict l = null_map.data(); - for (int i = 0; i < input_rows_count; ++i) { - res[i] &= (l[i] != 1); - } + _exec_nullable_inequal(res, l, input_rows_count, left_const); block.get_by_position(result).column = res_nullable_column->get_nested_column_ptr(); } return Status::OK(); } + +private: + static void _exec_nullable_equal(unsigned char* result, const unsigned char* left, + const unsigned char* right, size_t rows, bool left_const, + bool right_const) { + if (left_const) { + for (int i = 0; i < rows; ++i) { + result[i] |= left[0] & (left[0] == right[i]); + } + } else if (right_const) { + for (int i = 0; i < rows; ++i) { + result[i] |= left[i] & (left[i] == right[0]); + } + } else { + for (int i = 0; i < rows; ++i) { + result[i] |= left[i] & (left[i] == right[i]); + } + } + } + static void _exec_nullable_inequal(unsigned char* result, const unsigned char* left, + size_t rows, bool left_const) { + if (left_const) { + for (int i = 0; i < rows; ++i) { + result[i] &= (left[0] != 1); + } + } else { + for (int i = 0; i < rows; ++i) { + result[i] &= (left[i] != 1); + } + } + } }; void register_function_comparison_eq_for_null(SimpleFunctionFactory& factory) { diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index 7acc9560cb..753d84cec5 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -70,7 +70,7 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum } else { if (!mutable_result_null_map_column) { mutable_result_null_map_column = - (*std::move(result_null_map_column)).assume_mutable(); + std::move(result_null_map_column)->assume_mutable(); } NullMap& result_null_map = @@ -166,15 +166,24 @@ Status PreparedFunctionImpl::default_implementation_for_constant_arguments( !all_arguments_are_constant(block, args)) { return Status::OK(); } + // now all columns is const. Block temporary_block; size_t arguments_size = args.size(); for (size_t arg_num = 0; arg_num < arguments_size; ++arg_num) { const ColumnWithTypeAndName& column = block.get_by_position(args[arg_num]); - temporary_block.insert( - {assert_cast(column.column.get())->get_data_column_ptr(), - column.type, column.name}); + // Columns in const_list --> column_const, others --> nested_column + // that's because some functions supposes some specific columns always constant. + // If we unpack it, there will be unnecessary cost of virtual judge. + if (args_expect_const.end() != + std::find(args_expect_const.begin(), args_expect_const.end(), arg_num)) { + temporary_block.insert({column.column, column.type, column.name}); + } else { + temporary_block.insert( + {assert_cast(column.column.get())->get_data_column_ptr(), + column.type, column.name}); + } } temporary_block.insert(block.get_by_position(result)); diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 1abccd13be..a5f5123178 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -404,6 +404,8 @@ public: bool use_default_implementation_for_nulls() const override { return true; } bool use_default_implementation_for_constants() const override { return false; } bool use_default_implementation_for_low_cardinality_columns() const override { return true; } + + /// all constancy check should use this function to do automatically ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; } bool can_be_executed_on_low_cardinality_dictionary() const override { return is_deterministic_in_scope_of_query(); diff --git a/be/src/vec/functions/function_conv.cpp b/be/src/vec/functions/function_conv.cpp index d1c2120e4d..027a7e6a17 100644 --- a/be/src/vec/functions/function_conv.cpp +++ b/be/src/vec/functions/function_conv.cpp @@ -16,6 +16,9 @@ // under the License. #include "exprs/math_functions.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" +#include "vec/core/types.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" #include "vec/functions/simple_function_factory.h" @@ -49,29 +52,42 @@ public: auto result_column = ColumnString::create(); auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); + bool col_const[3]; ColumnPtr argument_columns[3]; - for (int i = 0; i < 3; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column(*argument_columns[i])) { - // Danger: Here must dispose the null map data first! Because - // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem - // of column nullable mem of null map - VectorizedUtils::update_null_map(result_null_map_column->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); - } + col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); + } + argument_columns[0] = col_const[0] ? static_cast( + *block.get_by_position(arguments[0]).column) + .convert_to_full_column() + : block.get_by_position(arguments[0]).column; + + default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); + + for (int i = 0; i < 3; i++) { + check_set_nullable(argument_columns[i], result_null_map_column); } - execute_straight( - context, - assert_cast(argument_columns[0].get()), - assert_cast(argument_columns[1].get()), - assert_cast(argument_columns[2].get()), - assert_cast(result_column.get()), - assert_cast(result_null_map_column.get())->get_data(), - input_rows_count); + if (col_const[1] && col_const[2]) { + execute_scalar_args( + context, + assert_cast( + argument_columns[0].get()), + assert_cast(argument_columns[1].get())->get_element(0), + assert_cast(argument_columns[2].get())->get_element(0), + assert_cast(result_column.get()), + assert_cast(result_null_map_column.get())->get_data(), + input_rows_count); + } else { + execute_straight(context, + assert_cast( + argument_columns[0].get()), + assert_cast(argument_columns[1].get()), + assert_cast(argument_columns[2].get()), + assert_cast(result_column.get()), + assert_cast(result_null_map_column.get())->get_data(), + input_rows_count); + } block.get_by_position(result).column = ColumnNullable::create(std::move(result_column), std::move(result_null_map_column)); @@ -79,28 +95,49 @@ public: } private: - void execute_straight(FunctionContext* context, - const typename Impl::DataType::ColumnType* data_column, - const ColumnInt8* src_base_column, const ColumnInt8* dst_base_column, - ColumnString* result_column, NullMap& result_null_map, - size_t input_rows_count) { + // check out of bound. + static bool _check_oob(const Int8 src_base, const Int8 dst_base) { + return std::abs(src_base) < MathFunctions::MIN_BASE || + std::abs(src_base) > MathFunctions::MAX_BASE || + std::abs(dst_base) < MathFunctions::MIN_BASE || + std::abs(dst_base) > MathFunctions::MAX_BASE; + } + static void execute_straight(FunctionContext* context, + const typename Impl::DataType::ColumnType* data_column, + const ColumnInt8* src_base_column, + const ColumnInt8* dst_base_column, ColumnString* result_column, + NullMap& result_null_map, size_t input_rows_count) { for (size_t i = 0; i < input_rows_count; i++) { if (result_null_map[i]) { result_column->insert_default(); continue; } - Int8 src_base = src_base_column->get_element(i); Int8 dst_base = dst_base_column->get_element(i); - if (std::abs(src_base) < MathFunctions::MIN_BASE || - std::abs(src_base) > MathFunctions::MAX_BASE || - std::abs(dst_base) < MathFunctions::MIN_BASE || - std::abs(dst_base) > MathFunctions::MAX_BASE) { + if (_check_oob(src_base, dst_base)) { result_null_map[i] = true; result_column->insert_default(); + } else { + Impl::calculate_cell(context, data_column, src_base, dst_base, result_column, + result_null_map, i); + } + } + } + static void execute_scalar_args(FunctionContext* context, + const typename Impl::DataType::ColumnType* data_column, + const Int8 src_base, const Int8 dst_base, + ColumnString* result_column, NullMap& result_null_map, + size_t input_rows_count) { + if (_check_oob(src_base, dst_base)) { + result_null_map.assign(input_rows_count, UInt8 {true}); + result_column->insert_many_defaults(input_rows_count); + return; + } + for (size_t i = 0; i < input_rows_count; i++) { + if (result_null_map[i]) { + result_column->insert_default(); continue; } - Impl::calculate_cell(context, data_column, src_base, dst_base, result_column, result_null_map, i); } diff --git a/be/src/vec/functions/function_convert_tz.h b/be/src/vec/functions/function_convert_tz.h index 54c3d253ee..9857417653 100644 --- a/be/src/vec/functions/function_convert_tz.h +++ b/be/src/vec/functions/function_convert_tz.h @@ -17,6 +17,7 @@ #pragma once +#include "vec/columns/column_const.h" #include "vec/columns/columns_number.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" @@ -60,47 +61,75 @@ struct ConvertTZImpl { result_column->insert_default(); continue; } - auto from_tz = from_tz_column->get_data_at(i).to_string(); auto to_tz = to_tz_column->get_data_at(i).to_string(); - - DateValueType ts_value = - binary_cast(date_column->get_element(i)); - int64_t timestamp; - - if (time_zone_cache.find(from_tz) == time_zone_cache.cend()) { - if (!TimezoneUtils::find_cctz_time_zone(from_tz, time_zone_cache[from_tz])) { - result_null_map[i] = true; - result_column->insert_default(); - continue; - } - } - - if (time_zone_cache.find(to_tz) == time_zone_cache.cend()) { - if (!TimezoneUtils::find_cctz_time_zone(to_tz, time_zone_cache[to_tz])) { - result_null_map[i] = true; - result_column->insert_default(); - continue; - } - } - - if (!ts_value.unix_timestamp(×tamp, time_zone_cache[from_tz])) { - result_null_map[i] = true; - result_column->insert_default(); - continue; - } - - ReturnDateType ts_value2; - if (!ts_value2.from_unixtime(timestamp, time_zone_cache[to_tz])) { - result_null_map[i] = true; - result_column->insert_default(); - continue; - } - - result_column->insert(binary_cast(ts_value2)); + execute_inner_loop(date_column, time_zone_cache, from_tz, to_tz, result_column, + result_null_map, i); } } + static void execute_tz_const(FunctionContext* context, const ColumnType* date_column, + const ColumnString* from_tz_column, + const ColumnString* to_tz_column, ReturnColumnType* result_column, + NullMap& result_null_map, size_t input_rows_count) { + auto convert_ctx = reinterpret_cast( + context->get_function_state(FunctionContext::FunctionStateScope::THREAD_LOCAL)); + std::map time_zone_cache_; + auto& time_zone_cache = convert_ctx ? convert_ctx->time_zone_cache : time_zone_cache_; + + auto from_tz = from_tz_column->get_data_at(0).to_string(); + auto to_tz = to_tz_column->get_data_at(0).to_string(); + for (size_t i = 0; i < input_rows_count; i++) { + if (result_null_map[i]) { + result_column->insert_default(); + continue; + } + execute_inner_loop(date_column, time_zone_cache, from_tz, to_tz, result_column, + result_null_map, i); + } + } + + static void execute_inner_loop(const ColumnType* date_column, + std::map& time_zone_cache, + const std::string& from_tz, const std::string& to_tz, + ReturnColumnType* result_column, NullMap& result_null_map, + const size_t index_now) { + DateValueType ts_value = + binary_cast(date_column->get_element(index_now)); + int64_t timestamp; + + if (time_zone_cache.find(from_tz) == time_zone_cache.cend()) { + if (!TimezoneUtils::find_cctz_time_zone(from_tz, time_zone_cache[from_tz])) { + result_null_map[index_now] = true; + result_column->insert_default(); + return; + } + } + + if (time_zone_cache.find(to_tz) == time_zone_cache.cend()) { + if (!TimezoneUtils::find_cctz_time_zone(to_tz, time_zone_cache[to_tz])) { + result_null_map[index_now] = true; + result_column->insert_default(); + return; + } + } + + if (!ts_value.unix_timestamp(×tamp, time_zone_cache[from_tz])) { + result_null_map[index_now] = true; + result_column->insert_default(); + return; + } + + ReturnDateType ts_value2; + if (!ts_value2.from_unixtime(timestamp, time_zone_cache[to_tz])) { + result_null_map[index_now] = true; + result_column->insert_default(); + return; + } + + result_column->insert(binary_cast(ts_value2)); + } + static DataTypes get_variadic_argument_types() { return {std::make_shared(), std::make_shared(), std::make_shared()}; @@ -152,56 +181,95 @@ public: size_t result, size_t input_rows_count) override { auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); + bool col_const[3]; ColumnPtr argument_columns[3]; - for (int i = 0; i < 3; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column(*argument_columns[i])) { - // Danger: Here must dispose the null map data first! Because - // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem - // of column nullable mem of null map - VectorizedUtils::update_null_map(result_null_map_column->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); + col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); + } + argument_columns[0] = col_const[0] ? static_cast( + *block.get_by_position(arguments[0]).column) + .convert_to_full_column() + : block.get_by_position(arguments[0]).column; + + default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); + + for (int i = 0; i < 3; i++) { + check_set_nullable(argument_columns[i], result_null_map_column); + } + + if (col_const[1] && col_const[2]) { + if constexpr (std::is_same_v || + std::is_same_v) { + auto result_column = ColumnDateTime::create(); + Transform::execute_tz_const( + context, assert_cast(argument_columns[0].get()), + assert_cast(argument_columns[1].get()), + assert_cast(argument_columns[2].get()), + assert_cast(result_column.get()), + assert_cast(result_null_map_column.get())->get_data(), + input_rows_count); + block.get_by_position(result).column = ColumnNullable::create( + std::move(result_column), std::move(result_null_map_column)); + } else if constexpr (std::is_same_v) { + auto result_column = ColumnDateTimeV2::create(); + Transform::execute_tz_const( + context, assert_cast(argument_columns[0].get()), + assert_cast(argument_columns[1].get()), + assert_cast(argument_columns[2].get()), + assert_cast(result_column.get()), + assert_cast(result_null_map_column.get())->get_data(), + input_rows_count); + block.get_by_position(result).column = ColumnNullable::create( + std::move(result_column), std::move(result_null_map_column)); + } else { + auto result_column = ColumnDateTimeV2::create(); + Transform::execute_tz_const( + context, assert_cast(argument_columns[0].get()), + assert_cast(argument_columns[1].get()), + assert_cast(argument_columns[2].get()), + assert_cast(result_column.get()), + assert_cast(result_null_map_column.get())->get_data(), + input_rows_count); + block.get_by_position(result).column = ColumnNullable::create( + std::move(result_column), std::move(result_null_map_column)); } - } - - if constexpr (std::is_same_v || - std::is_same_v) { - auto result_column = ColumnDateTime::create(); - Transform::execute(context, - assert_cast(argument_columns[0].get()), - assert_cast(argument_columns[1].get()), - assert_cast(argument_columns[2].get()), - assert_cast(result_column.get()), - assert_cast(result_null_map_column.get())->get_data(), - input_rows_count); - block.get_by_position(result).column = ColumnNullable::create( - std::move(result_column), std::move(result_null_map_column)); - } else if constexpr (std::is_same_v) { - auto result_column = ColumnDateTimeV2::create(); - Transform::execute(context, assert_cast(argument_columns[0].get()), - assert_cast(argument_columns[1].get()), - assert_cast(argument_columns[2].get()), - assert_cast(result_column.get()), - assert_cast(result_null_map_column.get())->get_data(), - input_rows_count); - block.get_by_position(result).column = ColumnNullable::create( - std::move(result_column), std::move(result_null_map_column)); } else { - auto result_column = ColumnDateTimeV2::create(); - Transform::execute(context, - assert_cast(argument_columns[0].get()), - assert_cast(argument_columns[1].get()), - assert_cast(argument_columns[2].get()), - assert_cast(result_column.get()), - assert_cast(result_null_map_column.get())->get_data(), - input_rows_count); - block.get_by_position(result).column = ColumnNullable::create( - std::move(result_column), std::move(result_null_map_column)); - } - + if constexpr (std::is_same_v || + std::is_same_v) { + auto result_column = ColumnDateTime::create(); + Transform::execute( + context, assert_cast(argument_columns[0].get()), + assert_cast(argument_columns[1].get()), + assert_cast(argument_columns[2].get()), + assert_cast(result_column.get()), + assert_cast(result_null_map_column.get())->get_data(), + input_rows_count); + block.get_by_position(result).column = ColumnNullable::create( + std::move(result_column), std::move(result_null_map_column)); + } else if constexpr (std::is_same_v) { + auto result_column = ColumnDateTimeV2::create(); + Transform::execute( + context, assert_cast(argument_columns[0].get()), + assert_cast(argument_columns[1].get()), + assert_cast(argument_columns[2].get()), + assert_cast(result_column.get()), + assert_cast(result_null_map_column.get())->get_data(), + input_rows_count); + block.get_by_position(result).column = ColumnNullable::create( + std::move(result_column), std::move(result_null_map_column)); + } else { + auto result_column = ColumnDateTimeV2::create(); + Transform::execute( + context, assert_cast(argument_columns[0].get()), + assert_cast(argument_columns[1].get()), + assert_cast(argument_columns[2].get()), + assert_cast(result_column.get()), + assert_cast(result_null_map_column.get())->get_data(), + input_rows_count); + block.get_by_position(result).column = ColumnNullable::create( + std::move(result_column), std::move(result_null_map_column)); + } //if datatype + } //if const return Status::OK(); } }; diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 8417724d9a..3ee87c59b9 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include "exprs/json_functions.h" @@ -30,6 +31,7 @@ #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/common/string_ref.h" +#include "vec/core/types.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" #include "vec/functions/function_string.h" @@ -234,6 +236,21 @@ struct GetJsonNumberType { using ReturnType = typename NumberType::ReturnType; using ColumnType = typename NumberType::ColumnType; using Container = typename ColumnType::Container; + + static void get_json_impl(rapidjson::Value*& root, const std::string_view& json_string, + const std::string_view& path_string, rapidjson::Document& document, + typename NumberType::T& res, UInt8& null_map) { + if constexpr (std::is_same_v) { + root = get_json_object(json_string, path_string, &document); + handle_result(root, res, null_map); + } else if constexpr (std::is_same_v) { + root = get_json_object(json_string, path_string, &document); + handle_result(root, res, null_map); + } else if constexpr (std::is_same_v) { + root = get_json_object(json_string, path_string, &document); + handle_result(root, res, null_map); + } + } static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, const ColumnString::Offsets& loffsets, const ColumnString::Chars& rdata, @@ -244,10 +261,8 @@ struct GetJsonNumberType { for (size_t i = 0; i < size; ++i) { const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); int l_str_size = loffsets[i] - loffsets[i - 1]; - const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); int r_str_size = roffsets[i] - roffsets[i - 1]; - if (null_map[i]) { res[i] = 0; continue; @@ -255,20 +270,52 @@ struct GetJsonNumberType { std::string_view json_string(l_raw_str, l_str_size); std::string_view path_string(r_raw_str, r_str_size); + rapidjson::Document document; + rapidjson::Value* root = nullptr; + + get_json_impl(root, json_string, path_string, document, res[i], null_map[i]); + } + } + static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, const StringRef& rdata, + Container& res, NullMap& null_map) { + size_t size = loffsets.size(); + res.resize(size); + std::string_view path_string(rdata.data, rdata.size); + for (size_t i = 0; i < size; ++i) { + const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); + int l_str_size = loffsets[i] - loffsets[i - 1]; + if (null_map[i]) { + res[i] = 0; + continue; + } + std::string_view json_string(l_raw_str, l_str_size); rapidjson::Document document; rapidjson::Value* root = nullptr; - if constexpr (std::is_same_v) { - root = get_json_object(json_string, path_string, &document); - handle_result(root, res[i], null_map[i]); - } else if constexpr (std::is_same_v) { - root = get_json_object(json_string, path_string, &document); - handle_result(root, res[i], null_map[i]); - } else if constexpr (std::is_same_v) { - root = get_json_object(json_string, path_string, &document); - handle_result(root, res[i], null_map[i]); + get_json_impl(root, json_string, path_string, document, res[i], null_map[i]); + } + } + static void scalar_vector(FunctionContext* context, const StringRef& ldata, + const ColumnString::Chars& rdata, + const ColumnString::Offsets& roffsets, Container& res, + NullMap& null_map) { + size_t size = roffsets.size(); + res.resize(size); + std::string_view json_string(ldata.data, ldata.size); + for (size_t i = 0; i < size; ++i) { + const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); + int r_str_size = roffsets[i] - roffsets[i - 1]; + if (null_map[i]) { + res[i] = 0; + continue; } + std::string_view path_string(r_raw_str, r_str_size); + rapidjson::Document document; + rapidjson::Value* root = nullptr; + + get_json_impl(root, json_string, path_string, document, res[i], null_map[i]); } } @@ -357,41 +404,87 @@ struct GetJsonString { res_offsets.resize(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { - int l_size = loffsets[i] - loffsets[i - 1]; - const auto l_raw = reinterpret_cast(&ldata[loffsets[i - 1]]); - - int r_size = roffsets[i] - roffsets[i - 1]; - const auto r_raw = reinterpret_cast(&rdata[roffsets[i - 1]]); - if (null_map[i]) { StringOP::push_null_string(i, res_data, res_offsets, null_map); continue; } + int l_size = loffsets[i] - loffsets[i - 1]; + const auto l_raw = reinterpret_cast(&ldata[loffsets[i - 1]]); + int r_size = roffsets[i] - roffsets[i - 1]; + const auto r_raw = reinterpret_cast(&rdata[roffsets[i - 1]]); std::string_view json_string(l_raw, l_size); std::string_view path_string(r_raw, r_size); - rapidjson::Document document; - rapidjson::Value* root = nullptr; + execute_impl(json_string, path_string, res_data, res_offsets, null_map, i); + } + } + static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets, + const StringRef& rdata, Chars& res_data, Offsets& res_offsets, + NullMap& null_map) { + size_t input_rows_count = loffsets.size(); + res_offsets.resize(input_rows_count); - root = get_json_object(json_string, path_string, &document); - const int max_string_len = 65535; - - if (root == nullptr || root->IsNull()) { + for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { StringOP::push_null_string(i, res_data, res_offsets, null_map); - } else if (root->IsString()) { - const auto ptr = root->GetString(); - size_t len = strnlen(ptr, max_string_len); - StringOP::push_value_string(std::string_view(ptr, len), i, res_data, res_offsets); - } else { - rapidjson::StringBuffer buf; - rapidjson::Writer writer(buf); - root->Accept(writer); - - const auto ptr = buf.GetString(); - size_t len = strnlen(ptr, max_string_len); - StringOP::push_value_string(std::string_view(ptr, len), i, res_data, res_offsets); + continue; } + int l_size = loffsets[i] - loffsets[i - 1]; + const auto l_raw = reinterpret_cast(&ldata[loffsets[i - 1]]); + + std::string_view json_string(l_raw, l_size); + std::string_view path_string(rdata.data, rdata.size); + + execute_impl(json_string, path_string, res_data, res_offsets, null_map, i); + } + } + static void scalar_vector(FunctionContext* context, const StringRef& ldata, const Chars& rdata, + const Offsets& roffsets, Chars& res_data, Offsets& res_offsets, + NullMap& null_map) { + size_t input_rows_count = roffsets.size(); + res_offsets.resize(input_rows_count); + + for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, res_data, res_offsets, null_map); + continue; + } + int r_size = roffsets[i] - roffsets[i - 1]; + const auto r_raw = reinterpret_cast(&rdata[roffsets[i - 1]]); + + std::string_view json_string(ldata.data, ldata.size); + std::string_view path_string(r_raw, r_size); + + execute_impl(json_string, path_string, res_data, res_offsets, null_map, i); + } + } + + static void execute_impl(const std::string_view& json_string, + const std::string_view& path_string, Chars& res_data, + Offsets& res_offsets, NullMap& null_map, size_t index_now) { + rapidjson::Document document; + rapidjson::Value* root = nullptr; + + root = get_json_object(json_string, path_string, &document); + const int max_string_len = 65535; + + if (root == nullptr || root->IsNull()) { + StringOP::push_null_string(index_now, res_data, res_offsets, null_map); + } else if (root->IsString()) { + const auto ptr = root->GetString(); + size_t len = strnlen(ptr, max_string_len); + StringOP::push_value_string(std::string_view(ptr, len), index_now, res_data, + res_offsets); + } else { + rapidjson::StringBuffer buf; + rapidjson::Writer writer(buf); + root->Accept(writer); + + const auto ptr = buf.GetString(); + size_t len = strnlen(ptr, max_string_len); + StringOP::push_value_string(std::string_view(ptr, len), index_now, res_data, + res_offsets); } } }; diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp index 8260eb4e39..750f11bca5 100644 --- a/be/src/vec/functions/function_jsonb.cpp +++ b/be/src/vec/functions/function_jsonb.cpp @@ -18,7 +18,7 @@ #include #include -// #include "util/jsonb_parser_simd.h" +#include "common/compiler_util.h" #include "util/string_parser.hpp" #include "util/string_util.h" #include "vec/columns/column.h" @@ -221,7 +221,9 @@ public: JsonbErrMsg::getErrMsg(error), std::string_view(val.data, val.size)); case JsonbParseErrorMode::RETURN_NULL: { - if (is_nullable) null_map->get_data()[i] = 1; + if (is_nullable) { + null_map->get_data()[i] = 1; + } col_to->insert_data("", 0); continue; } @@ -310,17 +312,11 @@ public: auto null_map = ColumnUInt8::create(input_rows_count, 0); DCHECK_EQ(arguments.size(), 2); ColumnPtr argument_columns[2]; + bool col_const[2]; for (int i = 0; i < 2; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column(*argument_columns[i])) { - // Danger: Here must dispose the null map data first! Because - // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem - // of column nullable mem of null map - VectorizedUtils::update_null_map(null_map->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); - } + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + check_set_nullable(argument_columns[i], null_map); } auto res = Impl::ColumnType::create(); @@ -339,11 +335,27 @@ public: std::is_same_v) { auto& res_data = res->get_chars(); auto& res_offsets = res->get_offsets(); - Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res_data, res_offsets, - null_map->get_data()); + if (col_const[0]) { + Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), rdata, roffsets, + res_data, res_offsets, null_map->get_data()); + } else if (col_const[1]) { + Impl::vector_scalar(context, ldata, loffsets, jsonb_path_column->get_data_at(0), + res_data, res_offsets, null_map->get_data()); + } else { + Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res_data, + res_offsets, null_map->get_data()); + } } else { - Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(), - null_map->get_data()); + if (col_const[0]) { + Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), rdata, roffsets, + res->get_data(), null_map->get_data()); + } else if (col_const[1]) { + Impl::vector_scalar(context, ldata, loffsets, jsonb_path_column->get_data_at(0), + res->get_data(), null_map->get_data()); + } else { + Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(), + null_map->get_data()); + } } block.get_by_position(result).column = ColumnNullable::create(std::move(res), std::move(null_map)); @@ -357,6 +369,81 @@ struct JsonbExtractStringImpl { using ColumnType = typename ValueType::ColumnType; static const bool only_check_exists = ValueType::only_check_exists; +private: + static ALWAYS_INLINE void inner_loop_impl(size_t i, ColumnString::Chars& res_data, + ColumnString::Offsets& res_offsets, NullMap& null_map, + const std::unique_ptr& writer, + std::unique_ptr& formater, + const char* l_raw, int l_size, const char* r_raw, + int r_size) { + String path(r_raw, r_size); + + if (null_map[i]) { + StringOP::push_null_string(i, res_data, res_offsets, null_map); + return; + } + + // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory + JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size); + if (UNLIKELY(!doc || !doc->getValue())) { + StringOP::push_null_string(i, res_data, res_offsets, null_map); + return; + } + + // value is NOT necessary to be deleted since JsonbValue will not allocate memory + JsonbValue* value = doc->getValue()->findPath(r_raw, r_size, ".", nullptr); + if (UNLIKELY(!value)) { + StringOP::push_null_string(i, res_data, res_offsets, null_map); + return; + } + + if constexpr (ValueType::only_get_type) { + StringOP::push_value_string(std::string_view(value->typeName()), i, res_data, + res_offsets); + return; + } + + if constexpr (std::is_same_v) { + writer->reset(); + writer->writeValue(value); + StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), + writer->getOutput()->getSize()), + i, res_data, res_offsets); + } else { + if (LIKELY(value->isString())) { + auto str_value = (JsonbStringVal*)value; + StringOP::push_value_string( + std::string_view(str_value->getBlob(), str_value->length()), i, res_data, + res_offsets); + } else if (value->isNull()) { + StringOP::push_value_string("null", i, res_data, res_offsets); + } else if (value->isTrue()) { + StringOP::push_value_string("true", i, res_data, res_offsets); + } else if (value->isFalse()) { + StringOP::push_value_string("false", i, res_data, res_offsets); + } else if (value->isInt8()) { + StringOP::push_value_string(std::to_string(((const JsonbInt8Val*)value)->val()), i, + res_data, res_offsets); + } else if (value->isInt16()) { + StringOP::push_value_string(std::to_string(((const JsonbInt16Val*)value)->val()), i, + res_data, res_offsets); + } else if (value->isInt32()) { + StringOP::push_value_string(std::to_string(((const JsonbInt32Val*)value)->val()), i, + res_data, res_offsets); + } else if (value->isInt64()) { + StringOP::push_value_string(std::to_string(((const JsonbInt64Val*)value)->val()), i, + res_data, res_offsets); + } else { + if (!formater) { + formater.reset(new JsonbToJson()); + } + StringOP::push_value_string(formater->to_json_string(value), i, res_data, + res_offsets); + } + } + } + +public: // for jsonb_extract_string static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, const ColumnString::Offsets& loffsets, @@ -375,80 +462,59 @@ struct JsonbExtractStringImpl { for (size_t i = 0; i < input_rows_count; ++i) { int l_size = loffsets[i] - loffsets[i - 1]; - const auto l_raw = reinterpret_cast(&ldata[loffsets[i - 1]]); + const char* l_raw = reinterpret_cast(&ldata[loffsets[i - 1]]); int r_size = roffsets[i] - roffsets[i - 1]; - const auto r_raw = reinterpret_cast(&rdata[roffsets[i - 1]]); - String path(r_raw, r_size); + const char* r_raw = reinterpret_cast(&rdata[roffsets[i - 1]]); - if (null_map[i]) { - StringOP::push_null_string(i, res_data, res_offsets, null_map); - continue; - } + inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size, + r_raw, r_size); + } //for + } //function + static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, const StringRef& rdata, + ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, + NullMap& null_map) { + size_t input_rows_count = loffsets.size(); + res_offsets.resize(input_rows_count); - // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size); - if (UNLIKELY(!doc || !doc->getValue())) { - StringOP::push_null_string(i, res_data, res_offsets, null_map); - continue; - } - - // value is NOT necessary to be deleted since JsonbValue will not allocate memory - JsonbValue* value = doc->getValue()->findPath(r_raw, r_size, ".", nullptr); - if (UNLIKELY(!value)) { - StringOP::push_null_string(i, res_data, res_offsets, null_map); - continue; - } - - if constexpr (ValueType::only_get_type) { - StringOP::push_value_string(std::string_view(value->typeName()), i, res_data, - res_offsets); - continue; - } - - if constexpr (std::is_same_v) { - writer->reset(); - writer->writeValue(value); - StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), - writer->getOutput()->getSize()), - i, res_data, res_offsets); - } else { - if (LIKELY(value->isString())) { - auto str_value = (JsonbStringVal*)value; - StringOP::push_value_string( - std::string_view(str_value->getBlob(), str_value->length()), i, - res_data, res_offsets); - } else if (value->isNull()) { - StringOP::push_value_string("null", i, res_data, res_offsets); - } else if (value->isTrue()) { - StringOP::push_value_string("true", i, res_data, res_offsets); - } else if (value->isFalse()) { - StringOP::push_value_string("false", i, res_data, res_offsets); - } else if (value->isInt8()) { - StringOP::push_value_string(std::to_string(((const JsonbInt8Val*)value)->val()), - i, res_data, res_offsets); - } else if (value->isInt16()) { - StringOP::push_value_string( - std::to_string(((const JsonbInt16Val*)value)->val()), i, res_data, - res_offsets); - } else if (value->isInt32()) { - StringOP::push_value_string( - std::to_string(((const JsonbInt32Val*)value)->val()), i, res_data, - res_offsets); - } else if (value->isInt64()) { - StringOP::push_value_string( - std::to_string(((const JsonbInt64Val*)value)->val()), i, res_data, - res_offsets); - } else { - if (!formater) { - formater.reset(new JsonbToJson()); - } - StringOP::push_value_string(formater->to_json_string(value), i, res_data, - res_offsets); - } - } + std::unique_ptr writer; + if constexpr (std::is_same_v) { + writer.reset(new JsonbWriter()); } - } + + std::unique_ptr formater; + + for (size_t i = 0; i < input_rows_count; ++i) { + int l_size = loffsets[i] - loffsets[i - 1]; + const char* l_raw = reinterpret_cast(&ldata[loffsets[i - 1]]); + + inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size, + rdata.data, rdata.size); + } //for + } //function + static void scalar_vector(FunctionContext* context, const StringRef& ldata, + const ColumnString::Chars& rdata, + const ColumnString::Offsets& roffsets, ColumnString::Chars& res_data, + ColumnString::Offsets& res_offsets, NullMap& null_map) { + size_t input_rows_count = roffsets.size(); + res_offsets.resize(input_rows_count); + + std::unique_ptr writer; + if constexpr (std::is_same_v) { + writer.reset(new JsonbWriter()); + } + + std::unique_ptr formater; + + for (size_t i = 0; i < input_rows_count; ++i) { + int r_size = roffsets[i] - roffsets[i - 1]; + const char* r_raw = reinterpret_cast(&rdata[roffsets[i - 1]]); + + inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, ldata.data, + ldata.size, r_raw, r_size); + } //for + } //function }; template @@ -458,6 +524,85 @@ struct JsonbExtractImpl { using Container = typename ColumnType::Container; static const bool only_check_exists = ValueType::only_check_exists; +private: + static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map, + const char* l_raw_str, int l_str_size, + const char* r_raw_str, int r_str_size) { + if (null_map[i]) { + res[i] = 0; + return; + } + + // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory + JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, l_str_size); + if (UNLIKELY(!doc || !doc->getValue())) { + null_map[i] = 1; + res[i] = 0; + return; + } + + // value is NOT necessary to be deleted since JsonbValue will not allocate memory + JsonbValue* value = doc->getValue()->findPath(r_raw_str, r_str_size, ".", nullptr); + + if (UNLIKELY(!value)) { + if constexpr (!only_check_exists) { + null_map[i] = 1; + } + res[i] = 0; + return; + } + + // if only check path exists, it's true here and skip check value + if constexpr (only_check_exists) { + res[i] = 1; + return; + } + + if constexpr (std::is_same_v) { + if (value->isNull()) { + res[i] = 1; + } else { + res[i] = 0; + } + } else if constexpr (std::is_same_v) { + if (value->isTrue()) { + res[i] = 1; + } else if (value->isFalse()) { + res[i] = 0; + } else { + null_map[i] = 1; + res[i] = 0; + } + } else if constexpr (std::is_same_v) { + if (value->isInt8() || value->isInt16() || value->isInt32()) { + res[i] = (int32_t)((const JsonbIntVal*)value)->val(); + } else { + null_map[i] = 1; + res[i] = 0; + } + } else if constexpr (std::is_same_v) { + if (value->isInt8() || value->isInt16() || value->isInt32() || value->isInt64()) { + res[i] = ((const JsonbIntVal*)value)->val(); + } else { + null_map[i] = 1; + res[i] = 0; + } + } else if constexpr (std::is_same_v) { + if (value->isDouble()) { + res[i] = ((const JsonbDoubleVal*)value)->val(); + } else if (value->isInt8() || value->isInt16() || value->isInt32() || + value->isInt64()) { + res[i] = ((const JsonbIntVal*)value)->val(); + } else { + null_map[i] = 1; + res[i] = 0; + } + } else { + LOG(FATAL) << "unexpected type "; + } + } + +public: // for jsonb_extract_int/int64/double static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, const ColumnString::Offsets& loffsets, @@ -478,79 +623,44 @@ struct JsonbExtractImpl { const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); int r_str_size = roffsets[i] - roffsets[i - 1]; - if (null_map[i]) { - res[i] = 0; - continue; - } + inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, r_raw_str, r_str_size); + } //for + } //function + static void scalar_vector(FunctionContext* context, const StringRef& ldata, + const ColumnString::Chars& rdata, + const ColumnString::Offsets& roffsets, Container& res, + NullMap& null_map) { + size_t size = roffsets.size(); + res.resize(size); - // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory - JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, l_str_size); - if (UNLIKELY(!doc || !doc->getValue())) { - null_map[i] = 1; - res[i] = 0; - continue; - } - - // value is NOT necessary to be deleted since JsonbValue will not allocate memory - JsonbValue* value = doc->getValue()->findPath(r_raw_str, r_str_size, ".", nullptr); - if (UNLIKELY(!value)) { - if constexpr (!only_check_exists) { - null_map[i] = 1; - } - res[i] = 0; - continue; - } - - // if only check path exists, it's true here and skip check value + for (size_t i = 0; i < size; i++) { if constexpr (only_check_exists) { - res[i] = 1; - continue; + res[i] = 0; } - if constexpr (std::is_same_v) { - if (value->isNull()) { - res[i] = 1; - } else { - res[i] = 0; - } - } else if constexpr (std::is_same_v) { - if (value->isTrue()) { - res[i] = 1; - } else if (value->isFalse()) { - res[i] = 0; - } else { - null_map[i] = 1; - res[i] = 0; - } - } else if constexpr (std::is_same_v) { - if (value->isInt8() || value->isInt16() || value->isInt32()) { - res[i] = (int32_t)((const JsonbIntVal*)value)->val(); - } else { - null_map[i] = 1; - res[i] = 0; - } - } else if constexpr (std::is_same_v) { - if (value->isInt8() || value->isInt16() || value->isInt32() || value->isInt64()) { - res[i] = ((const JsonbIntVal*)value)->val(); - } else { - null_map[i] = 1; - res[i] = 0; - } - } else if constexpr (std::is_same_v) { - if (value->isDouble()) { - res[i] = ((const JsonbDoubleVal*)value)->val(); - } else if (value->isInt8() || value->isInt16() || value->isInt32() || - value->isInt64()) { - res[i] = ((const JsonbIntVal*)value)->val(); - } else { - null_map[i] = 1; - res[i] = 0; - } - } else { - LOG(FATAL) << "unexpected type "; + const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); + int r_str_size = roffsets[i] - roffsets[i - 1]; + + inner_loop_impl(i, res, null_map, ldata.data, ldata.size, r_raw_str, r_str_size); + } //for + } //function + static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, const StringRef& rdata, + Container& res, NullMap& null_map) { + size_t size = loffsets.size(); + res.resize(size); + + for (size_t i = 0; i < loffsets.size(); i++) { + if constexpr (only_check_exists) { + res[i] = 0; } - } - } + + const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); + int l_str_size = loffsets[i] - loffsets[i - 1]; + + inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, rdata.data, rdata.size); + } //for + } //function }; struct JsonbTypeExists { diff --git a/be/src/vec/functions/function_map.cpp b/be/src/vec/functions/function_map.cpp index 9cb5c5898d..075957d5cc 100644 --- a/be/src/vec/functions/function_map.cpp +++ b/be/src/vec/functions/function_map.cpp @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include + #include "vec/columns/column_array.h" #include "vec/columns/column_const.h" #include "vec/columns/column_map.h" @@ -90,12 +92,13 @@ public: auto& result_col_map_offsets = map_column->get_offsets(); result_col_map_offsets.resize(input_rows_count); - // convert to nullable column + std::unique_ptr col_const = std::make_unique(num_element); for (size_t i = 0; i < num_element; ++i) { auto& col = block.get_by_position(arguments[i]).column; - col = col->convert_to_full_column_if_const(); + std::tie(col, col_const[i]) = unpack_if_const(col); bool is_nullable = i % 2 == 0 ? result_col_map_keys_data.is_nullable() : result_col_map_vals_data.is_nullable(); + // convert to nullable column if (is_nullable && !col->is_nullable()) { col = ColumnNullable::create(col, ColumnUInt8::create(col->size(), 0)); } @@ -106,9 +109,10 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { for (size_t i = 0; i < num_element; i += 2) { result_col_map_keys_data.insert_from(*block.get_by_position(arguments[i]).column, - row); + index_check_const(row, col_const[i])); result_col_map_vals_data.insert_from( - *block.get_by_position(arguments[i + 1]).column, row); + *block.get_by_position(arguments[i + 1]).column, + index_check_const(row, col_const[i + 1])); } offset += num_element / 2; result_col_map_offsets[row] = offset; @@ -142,8 +146,8 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { - auto left_column = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + const auto& [left_column, left_const] = + unpack_if_const(block.get_by_position(arguments[0]).column); const ColumnMap* map_column = nullptr; // const UInt8* map_null_map = nullptr; if (left_column->is_nullable()) { @@ -161,8 +165,14 @@ public: auto dst_column = ColumnInt64::create(input_rows_count); auto& dst_data = dst_column->get_data(); - for (size_t i = 0; i < map_column->size(); i++) { - dst_data[i] = map_column->size_at(i); + if (left_const) { + for (size_t i = 0; i < map_column->size(); i++) { + dst_data[i] = map_column->size_at(0); + } + } else { + for (size_t i = 0; i < map_column->size(); i++) { + dst_data[i] = map_column->size_at(i); + } } block.replace_by_position(result, std::move(dst_column)); diff --git a/be/src/vec/functions/function_regexp.cpp b/be/src/vec/functions/function_regexp.cpp index 11e5d2f992..8f646a82f6 100644 --- a/be/src/vec/functions/function_regexp.cpp +++ b/be/src/vec/functions/function_regexp.cpp @@ -21,7 +21,11 @@ #include "exprs/string_functions.h" #include "udf/udf.h" +#include "vec/columns/column.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_string.h" #include "vec/common/string_ref.h" +#include "vec/core/types.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" #include "vec/functions/function_string.h" @@ -31,103 +35,148 @@ namespace doris::vectorized { struct RegexpReplaceImpl { static constexpr auto name = "regexp_replace"; - - static Status execute_impl(FunctionContext* context, ColumnPtr argument_columns[], - size_t input_rows_count, ColumnString::Chars& result_data, - ColumnString::Offsets& result_offset, NullMap& null_map) { + // 3 args + static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { const auto* str_col = check_and_get_column(argument_columns[0].get()); const auto* pattern_col = check_and_get_column(argument_columns[1].get()); const auto* replace_col = check_and_get_column(argument_columns[2].get()); - for (int i = 0; i < input_rows_count; ++i) { + for (size_t i = 0; i < input_rows_count; ++i) { if (null_map[i]) { StringOP::push_null_string(i, result_data, result_offset, null_map); continue; } + _execute_inner_loop(context, str_col, pattern_col, replace_col, result_data, + result_offset, null_map, i); + } + } + static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + const auto* str_col = check_and_get_column(argument_columns[0].get()); + const auto* pattern_col = check_and_get_column(argument_columns[1].get()); + const auto* replace_col = check_and_get_column(argument_columns[2].get()); - re2::RE2* re = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); - std::unique_ptr scoped_re; // destroys re if state->re is nullptr - if (re == nullptr) { - std::string error_str; - const auto& pattern = pattern_col->get_data_at(i); - bool st = - StringFunctions::compile_regex(pattern, &error_str, StringRef(), scoped_re); - if (!st) { - context->add_warning(error_str.c_str()); - StringOP::push_null_string(i, result_data, result_offset, null_map); - continue; - } - re = scoped_re.get(); + for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; } - - re2::StringPiece replace_str = - re2::StringPiece(replace_col->get_data_at(i).to_string_view()); - - std::string result_str(str_col->get_data_at(i).to_string()); - re2::RE2::GlobalReplace(&result_str, *re, replace_str); - StringOP::push_value_string(result_str, i, result_data, result_offset); + _execute_inner_loop(context, str_col, pattern_col, replace_col, result_data, + result_offset, null_map, i); + } + } + template + static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col, + const ColumnString* pattern_col, + const ColumnString* replace_col, + ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map, + const size_t index_now) { + re2::RE2* re = reinterpret_cast( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + std::unique_ptr scoped_re; // destroys re if state->re is nullptr + if (re == nullptr) { + std::string error_str; + const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); + bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), scoped_re); + if (!st) { + context->add_warning(error_str.c_str()); + StringOP::push_null_string(index_now, result_data, result_offset, null_map); + return; + } + re = scoped_re.get(); } - return Status::OK(); + re2::StringPiece replace_str = re2::StringPiece( + replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view()); + + std::string result_str(str_col->get_data_at(index_now).to_string()); + re2::RE2::GlobalReplace(&result_str, *re, replace_str); + StringOP::push_value_string(result_str, index_now, result_data, result_offset); } }; struct RegexpReplaceOneImpl { static constexpr auto name = "regexp_replace_one"; - static Status execute_impl(FunctionContext* context, ColumnPtr argument_columns[], - size_t input_rows_count, ColumnString::Chars& result_data, - ColumnString::Offsets& result_offset, NullMap& null_map) { + static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { const auto* str_col = check_and_get_column(argument_columns[0].get()); const auto* pattern_col = check_and_get_column(argument_columns[1].get()); const auto* replace_col = check_and_get_column(argument_columns[2].get()); - - for (int i = 0; i < input_rows_count; ++i) { + // 3 args + for (size_t i = 0; i < input_rows_count; ++i) { if (null_map[i]) { StringOP::push_null_string(i, result_data, result_offset, null_map); continue; } + _execute_inner_loop(context, str_col, pattern_col, replace_col, result_data, + result_offset, null_map, i); + } + } - re2::RE2* re = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); - std::unique_ptr scoped_re; // destroys re if state->re is nullptr - if (re == nullptr) { - std::string error_str; - const auto& pattern = pattern_col->get_data_at(i); - bool st = - StringFunctions::compile_regex(pattern, &error_str, StringRef(), scoped_re); - if (!st) { - context->add_warning(error_str.c_str()); - StringOP::push_null_string(i, result_data, result_offset, null_map); - continue; - } - re = scoped_re.get(); + static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + const auto* str_col = check_and_get_column(argument_columns[0].get()); + const auto* pattern_col = check_and_get_column(argument_columns[1].get()); + const auto* replace_col = check_and_get_column(argument_columns[2].get()); + // 3 args + for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; } - - re2::StringPiece replace_str = - re2::StringPiece(replace_col->get_data_at(i).to_string_view()); - - std::string result_str(str_col->get_data_at(i).to_string()); - re2::RE2::Replace(&result_str, *re, replace_str); - StringOP::push_value_string(result_str, i, result_data, result_offset); + _execute_inner_loop(context, str_col, pattern_col, replace_col, result_data, + result_offset, null_map, i); + } + } + template + static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col, + const ColumnString* pattern_col, + const ColumnString* replace_col, + ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map, + const size_t index_now) { + re2::RE2* re = reinterpret_cast( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + std::unique_ptr scoped_re; // destroys re if state->re is nullptr + if (re == nullptr) { + std::string error_str; + const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); + bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), scoped_re); + if (!st) { + context->add_warning(error_str.c_str()); + StringOP::push_null_string(index_now, result_data, result_offset, null_map); + return; + } + re = scoped_re.get(); } - return Status::OK(); + re2::StringPiece replace_str = re2::StringPiece( + replace_col->get_data_at(index_check_const(index_now, Const)).to_string_view()); + + std::string result_str(str_col->get_data_at(index_now).to_string()); + re2::RE2::Replace(&result_str, *re, replace_str); + StringOP::push_value_string(result_str, index_now, result_data, result_offset); } }; struct RegexpExtractImpl { static constexpr auto name = "regexp_extract"; - - static Status execute_impl(FunctionContext* context, ColumnPtr argument_columns[], - size_t input_rows_count, ColumnString::Chars& result_data, - ColumnString::Offsets& result_offset, NullMap& null_map) { + // 3 args + static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { const auto* str_col = check_and_get_column(argument_columns[0].get()); const auto* pattern_col = check_and_get_column(argument_columns[1].get()); const auto* index_col = check_and_get_column>(argument_columns[2].get()); - for (int i = 0; i < input_rows_count; ++i) { + for (size_t i = 0; i < input_rows_count; ++i) { if (null_map[i]) { StringOP::push_null_string(i, result_data, result_offset, null_map); continue; @@ -137,43 +186,76 @@ struct RegexpExtractImpl { StringOP::push_empty_string(i, result_data, result_offset); continue; } - - re2::RE2* re = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); - std::unique_ptr scoped_re; - if (re == nullptr) { - std::string error_str; - const auto& pattern = pattern_col->get_data_at(i); - bool st = - StringFunctions::compile_regex(pattern, &error_str, StringRef(), scoped_re); - if (!st) { - context->add_warning(error_str.c_str()); - StringOP::push_null_string(i, result_data, result_offset, null_map); - continue; - } - re = scoped_re.get(); - } - const auto& str = str_col->get_data_at(i); - re2::StringPiece str_sp = re2::StringPiece(str.data, str.size); - - int max_matches = 1 + re->NumberOfCapturingGroups(); - if (index_data >= max_matches) { - StringOP::push_empty_string(i, result_data, result_offset); - continue; - } - - std::vector matches(max_matches); - bool success = - re->Match(str_sp, 0, str.size, re2::RE2::UNANCHORED, &matches[0], max_matches); - if (!success) { - StringOP::push_empty_string(i, result_data, result_offset); - continue; - } - const re2::StringPiece& match = matches[index_data]; - StringOP::push_value_string(std::string_view(match.data(), match.size()), i, - result_data, result_offset); + _execute_inner_loop(context, str_col, pattern_col, index_data, result_data, + result_offset, null_map, i); } - return Status::OK(); + } + + static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + const auto* str_col = check_and_get_column(argument_columns[0].get()); + const auto* pattern_col = check_and_get_column(argument_columns[1].get()); + const auto* index_col = + check_and_get_column>(argument_columns[2].get()); + + const auto& index_data = index_col->get_int(0); + if (index_data < 0) { + for (size_t i = 0; i < input_rows_count; ++i) { + StringOP::push_empty_string(i, result_data, result_offset); + } + return; + } + + for (size_t i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + + _execute_inner_loop(context, str_col, pattern_col, index_data, result_data, + result_offset, null_map, i); + } + } + template + static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col, + const ColumnString* pattern_col, const Int64 index_data, + ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map, + const size_t index_now) { + re2::RE2* re = reinterpret_cast( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + std::unique_ptr scoped_re; + if (re == nullptr) { + std::string error_str; + const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); + bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), scoped_re); + if (!st) { + context->add_warning(error_str.c_str()); + StringOP::push_null_string(index_now, result_data, result_offset, null_map); + return; + } + re = scoped_re.get(); + } + const auto& str = str_col->get_data_at(index_now); + re2::StringPiece str_sp = re2::StringPiece(str.data, str.size); + + int max_matches = 1 + re->NumberOfCapturingGroups(); + if (index_data >= max_matches) { + StringOP::push_empty_string(index_now, result_data, result_offset); + return; + } + + std::vector matches(max_matches); + bool success = + re->Match(str_sp, 0, str.size, re2::RE2::UNANCHORED, &matches[0], max_matches); + if (!success) { + StringOP::push_empty_string(index_now, result_data, result_offset); + return; + } + const re2::StringPiece& match = matches[index_data]; + StringOP::push_value_string(std::string_view(match.data(), match.size()), index_now, + result_data, result_offset); } }; @@ -182,9 +264,9 @@ struct RegexpExtractAllImpl { size_t get_number_of_arguments() const { return 2; } - static Status execute_impl(FunctionContext* context, ColumnPtr argument_columns[], - size_t input_rows_count, ColumnString::Chars& result_data, - ColumnString::Offsets& result_offset, NullMap& null_map) { + static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { const auto* str_col = check_and_get_column(argument_columns[0].get()); const auto* pattern_col = check_and_get_column(argument_columns[1].get()); for (int i = 0; i < input_rows_count; ++i) { @@ -192,62 +274,82 @@ struct RegexpExtractAllImpl { StringOP::push_null_string(i, result_data, result_offset, null_map); continue; } - - re2::RE2* re = reinterpret_cast( - context->get_function_state(FunctionContext::THREAD_LOCAL)); - std::unique_ptr scoped_re; - if (re == nullptr) { - std::string error_str; - const auto& pattern = pattern_col->get_data_at(i); - bool st = - StringFunctions::compile_regex(pattern, &error_str, StringRef(), scoped_re); - if (!st) { - context->add_warning(error_str.c_str()); - StringOP::push_null_string(i, result_data, result_offset, null_map); - continue; - } - re = scoped_re.get(); - } - if (re->NumberOfCapturingGroups() == 0) { - StringOP::push_empty_string(i, result_data, result_offset); - continue; - } - const auto& str = str_col->get_data_at(i); - int max_matches = 1 + re->NumberOfCapturingGroups(); - std::vector res_matches; - size_t pos = 0; - while (pos < str.size) { - auto str_pos = str.data + pos; - auto str_size = str.size - pos; - re2::StringPiece str_sp = re2::StringPiece(str_pos, str_size); - std::vector matches(max_matches); - bool success = re->Match(str_sp, 0, str_size, re2::RE2::UNANCHORED, &matches[0], - max_matches); - if (!success) { - StringOP::push_empty_string(i, result_data, result_offset); - break; - } - res_matches.push_back(matches[1]); - auto offset = - std::string(str_pos, str_size).find(std::string(matches[0].as_string())); - pos += offset + matches[0].size(); - } - - if (res_matches.empty()) { - continue; - } - - std::string res = "["; - for (int j = 0; j < res_matches.size(); ++j) { - res += "'" + res_matches[j].as_string() + "'"; - if (j < res_matches.size() - 1) { - res += ","; - } - } - res += "]"; - StringOP::push_value_string(std::string_view(res), i, result_data, result_offset); + _execute_inner_loop(context, str_col, pattern_col, result_data, result_offset, + null_map, i); } - return Status::OK(); + } + + static void execute_impl_const_args(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + const auto* str_col = check_and_get_column(argument_columns[0].get()); + const auto* pattern_col = check_and_get_column(argument_columns[1].get()); + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + _execute_inner_loop(context, str_col, pattern_col, result_data, result_offset, + null_map, i); + } + } + template + static void _execute_inner_loop(FunctionContext* context, const ColumnString* str_col, + const ColumnString* pattern_col, + ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map, + const size_t index_now) { + re2::RE2* re = reinterpret_cast( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + std::unique_ptr scoped_re; + if (re == nullptr) { + std::string error_str; + const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, Const)); + bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), scoped_re); + if (!st) { + context->add_warning(error_str.c_str()); + StringOP::push_null_string(index_now, result_data, result_offset, null_map); + return; + } + re = scoped_re.get(); + } + if (re->NumberOfCapturingGroups() == 0) { + StringOP::push_empty_string(index_now, result_data, result_offset); + return; + } + const auto& str = str_col->get_data_at(index_now); + int max_matches = 1 + re->NumberOfCapturingGroups(); + std::vector res_matches; + size_t pos = 0; + while (pos < str.size) { + auto str_pos = str.data + pos; + auto str_size = str.size - pos; + re2::StringPiece str_sp = re2::StringPiece(str_pos, str_size); + std::vector matches(max_matches); + bool success = + re->Match(str_sp, 0, str_size, re2::RE2::UNANCHORED, &matches[0], max_matches); + if (!success) { + StringOP::push_empty_string(index_now, result_data, result_offset); + break; + } + res_matches.push_back(matches[1]); + auto offset = std::string(str_pos, str_size).find(std::string(matches[0].as_string())); + pos += offset + matches[0].size(); + } + + if (res_matches.empty()) { + return; + } + + std::string res = "["; + for (int j = 0; j < res_matches.size(); ++j) { + res += "'" + res_matches[j].as_string() + "'"; + if (j < res_matches.size() - 1) { + res += ","; + } + } + res += "]"; + StringOP::push_value_string(std::string_view(res), index_now, result_data, result_offset); } }; @@ -303,26 +405,52 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { size_t argument_size = arguments.size(); - ColumnPtr argument_columns[argument_size]; + auto result_null_map = ColumnUInt8::create(input_rows_count, 0); auto result_data_column = ColumnString::create(); - auto& result_data = result_data_column->get_chars(); auto& result_offset = result_data_column->get_offsets(); result_offset.resize(input_rows_count); + bool col_const[3]; + ColumnPtr argument_columns[3]; for (int i = 0; i < argument_size; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column(*argument_columns[i])) { - VectorizedUtils::update_null_map(result_null_map->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); - } + col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); + } + argument_columns[0] = col_const[0] ? static_cast( + *block.get_by_position(arguments[0]).column) + .convert_to_full_column() + : block.get_by_position(arguments[0]).column; + if constexpr (std::is_same_v) { + default_preprocess_parameter_columns(argument_columns, col_const, {1}, block, + arguments); + } else { + default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, + arguments); + } + for (int i = 0; i < argument_size; i++) { + check_set_nullable(argument_columns[i], result_null_map); } - Impl::execute_impl(context, argument_columns, input_rows_count, result_data, result_offset, - result_null_map->get_data()); + if constexpr (std::is_same_v) { + if (col_const[1]) { + Impl::execute_impl_const_args(context, argument_columns, input_rows_count, + result_data, result_offset, + result_null_map->get_data()); + } else { + Impl::execute_impl(context, argument_columns, input_rows_count, result_data, + result_offset, result_null_map->get_data()); + } + } else { + if (col_const[1] && col_const[2]) { + Impl::execute_impl_const_args(context, argument_columns, input_rows_count, + result_data, result_offset, + result_null_map->get_data()); + } else { + Impl::execute_impl(context, argument_columns, input_rows_count, result_data, + result_offset, result_null_map->get_data()); + } + } block.get_by_position(result).column = ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 34c65219f0..5579019452 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -25,7 +25,9 @@ #include "runtime/string_search.hpp" #include "util/url_coding.h" +#include "vec/columns/column_string.h" #include "vec/common/pod_array_fwd.h" +#include "vec/common/string_ref.h" #include "vec/functions/function_reverse.h" #include "vec/functions/function_string_to_string.h" #include "vec/functions/function_totype.h" @@ -212,10 +214,10 @@ struct StringFunctionImpl { using ResultDataType = typename OP::ResultDataType; using ResultPaddedPODArray = typename OP::ResultPaddedPODArray; - static Status vector_vector(const ColumnString::Chars& ldata, - const ColumnString::Offsets& loffsets, - const ColumnString::Chars& rdata, - const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { + static void vector_vector(const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, + const ColumnString::Chars& rdata, + const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { DCHECK_EQ(loffsets.size(), roffsets.size()); auto size = loffsets.size(); @@ -232,8 +234,33 @@ struct StringFunctionImpl { OP::execute(lview, rview, res[i]); } + } + static void vector_scalar(const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, const StringRef& rdata, + ResultPaddedPODArray& res) { + auto size = loffsets.size(); + res.resize(size); + std::string_view rview(rdata.data, rdata.size); + for (int i = 0; i < size; ++i) { + const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); + int l_str_size = loffsets[i] - loffsets[i - 1]; + std::string_view lview(l_raw_str, l_str_size); - return Status::OK(); + OP::execute(lview, rview, res[i]); + } + } + static void scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata, + const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { + auto size = roffsets.size(); + res.resize(size); + std::string_view lview(ldata.data, ldata.size); + for (int i = 0; i < size; ++i) { + const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); + int r_str_size = roffsets[i] - roffsets[i - 1]; + std::string_view rview(r_raw_str, r_str_size); + + OP::execute(lview, rview, res[i]); + } } }; @@ -582,6 +609,66 @@ struct StringAppendTrailingCharIfAbsent { res_offsets); } } + static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets, + const StringRef& rdata, Chars& res_data, Offsets& res_offsets, + NullMap& null_map_data) { + size_t input_rows_count = loffsets.size(); + res_offsets.resize(input_rows_count); + fmt::memory_buffer buffer; + + if (rdata.size != 1) { + for (size_t i = 0; i < input_rows_count; ++i) { + StringOP::push_null_string(i, res_data, res_offsets, null_map_data); + } + return; + } + + for (size_t i = 0; i < input_rows_count; ++i) { + buffer.clear(); + + int l_size = loffsets[i] - loffsets[i - 1]; + const auto l_raw = reinterpret_cast(&ldata[loffsets[i - 1]]); + + if (l_raw[l_size - 1] == rdata.data[0]) { + StringOP::push_value_string(std::string_view(l_raw, l_size), i, res_data, + res_offsets); + continue; + } + + buffer.append(l_raw, l_raw + l_size); + buffer.append(rdata.begin(), rdata.end()); + StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, + res_offsets); + } + } + static void scalar_vector(FunctionContext* context, const StringRef& ldata, const Chars& rdata, + const Offsets& roffsets, Chars& res_data, Offsets& res_offsets, + NullMap& null_map_data) { + size_t input_rows_count = roffsets.size(); + res_offsets.resize(input_rows_count); + fmt::memory_buffer buffer; + + for (size_t i = 0; i < input_rows_count; ++i) { + buffer.clear(); + + int r_size = roffsets[i] - roffsets[i - 1]; + const auto r_raw = reinterpret_cast(&rdata[roffsets[i - 1]]); + + if (r_size != 1) { + StringOP::push_null_string(i, res_data, res_offsets, null_map_data); + continue; + } + if (ldata.size == 0 || ldata.back() == r_raw[0]) { + StringOP::push_value_string(ldata.to_string_view(), i, res_data, res_offsets); + continue; + } + + buffer.append(ldata.begin(), ldata.end()); + buffer.append(r_raw, r_raw + 1); + StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, + res_offsets); + } + } }; struct StringLPad { @@ -625,8 +712,6 @@ using FunctionStringLocate = using FunctionStringFindInSet = FunctionBinaryToType; -using FunctionUnHex = FunctionStringOperateToNullType; - using FunctionToLower = FunctionStringToString, NameToLower>; using FunctionToUpper = FunctionStringToString, NameToUpper>; @@ -639,8 +724,8 @@ using FunctionRTrim = FunctionStringToString, NameRTrim>; using FunctionTrim = FunctionStringToString, NameTrim>; +using FunctionUnHex = FunctionStringOperateToNullType; using FunctionToBase64 = FunctionStringOperateToNullType; - using FunctionFromBase64 = FunctionStringOperateToNullType; using FunctionStringAppendTrailingCharIfAbsent = diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 713d8864d3..b02f379781 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -20,10 +20,12 @@ #include #include +#include #include #include "util/string_util.h" #include "vec/columns/column.h" +#include "vec/columns/column_const.h" #ifndef USE_LIBCPP #include #define PMR std::pmr @@ -122,44 +124,49 @@ struct SubstringUtil { static void substring_execute(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { DCHECK_EQ(arguments.size(), 3); + auto res = ColumnString::create(); auto null_map = ColumnUInt8::create(input_rows_count, 0); + bool col_const[3]; ColumnPtr argument_columns[3]; - for (int i = 0; i < 3; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column(*argument_columns[i])) { - // Danger: Here must dispose the null map data first! Because - // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem - // of column nullable mem of null map - VectorizedUtils::update_null_map(null_map->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); - } + col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); } + argument_columns[0] = col_const[0] ? static_cast( + *block.get_by_position(arguments[0]).column) + .convert_to_full_column() + : block.get_by_position(arguments[0]).column; - auto res = ColumnString::create(); + default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); + + for (int i = 0; i < 3; i++) { + check_set_nullable(argument_columns[i], null_map); + } auto specific_str_column = assert_cast(argument_columns[0].get()); auto specific_start_column = assert_cast*>(argument_columns[1].get()); auto specific_len_column = assert_cast*>(argument_columns[2].get()); - - vector(specific_str_column->get_chars(), specific_str_column->get_offsets(), - specific_start_column->get_data(), specific_len_column->get_data(), - null_map->get_data(), res->get_chars(), res->get_offsets()); - + if (col_const[1] && col_const[2]) { + vectors(specific_str_column->get_chars(), specific_str_column->get_offsets(), + specific_start_column->get_data(), specific_len_column->get_data(), + null_map->get_data(), res->get_chars(), res->get_offsets()); + } else { + vectors(specific_str_column->get_chars(), specific_str_column->get_offsets(), + specific_start_column->get_data(), specific_len_column->get_data(), + null_map->get_data(), res->get_chars(), res->get_offsets()); + } block.get_by_position(result).column = ColumnNullable::create(std::move(res), std::move(null_map)); } private: - static void vector(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets, - const PaddedPODArray& start, const PaddedPODArray& len, - NullMap& null_map, ColumnString::Chars& res_chars, - ColumnString::Offsets& res_offsets) { + template + static void vectors(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets, + const PaddedPODArray& start, const PaddedPODArray& len, + NullMap& null_map, ColumnString::Chars& res_chars, + ColumnString::Offsets& res_offsets) { int size = offsets.size(); res_offsets.resize(size); res_chars.reserve(chars.size()); @@ -179,8 +186,11 @@ private: for (int i = 0; i < size; ++i) { auto [raw_str, str_size] = strs[i]; + const auto& start_value = start[index_check_const(i, Const)]; + const auto& len_value = len[index_check_const(i, Const)]; + // return empty string if start > src.length - if (start[i] > str_size || str_size == 0 || start[i] == 0 || len[i] <= 0) { + if (start_value > str_size || str_size == 0 || start_value == 0 || len_value <= 0) { StringOP::push_empty_string(i, res_chars, res_offsets); continue; } @@ -190,12 +200,12 @@ private: for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { char_size = UTF8_BYTE_LENGTH[(unsigned char)(raw_str)[j]]; index.push_back(j); - if (start[i] > 0 && index.size() > start[i] + len[i]) { + if (start_value > 0 && index.size() > start_value + len_value) { break; } } - int fixed_pos = start[i]; + int fixed_pos = start_value; if (fixed_pos < -(int)index.size()) { StringOP::push_empty_string(i, res_chars, res_offsets); continue; @@ -210,8 +220,8 @@ private: byte_pos = index[fixed_pos - 1]; int fixed_len = str_size - byte_pos; - if (fixed_pos + len[i] <= index.size()) { - fixed_len = index[fixed_pos + len[i] - 1] - byte_pos; + if (fixed_pos + len_value <= index.size()) { + fixed_len = index[fixed_pos + len_value - 1] - byte_pos; } if (byte_pos <= str_size && fixed_len > 0) { @@ -275,22 +285,27 @@ struct Substr2Impl { static Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { - auto params = ColumnInt32::create(input_rows_count); - auto& strlen_data = params->get_data(); + auto col_len = ColumnInt32::create(input_rows_count); + auto& strlen_data = col_len->get_data(); - auto str_col = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + ColumnPtr str_col; + bool str_const; + std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column); if (auto* nullable = check_and_get_column(*str_col)) { str_col = nullable->get_nested_column_ptr(); } auto& str_offset = assert_cast(str_col.get())->get_offsets(); - for (int i = 0; i < input_rows_count; ++i) { - strlen_data[i] = str_offset[i] - str_offset[i - 1]; + if (str_const) { + std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]); + } else { + for (int i = 0; i < input_rows_count; ++i) { + strlen_data[i] = str_offset[i] - str_offset[i - 1]; + } } - block.insert({std::move(params), std::make_shared(), "strlen"}); - + // we complete the column2(strlen) with the default value - each row's strlen. + block.insert({std::move(col_len), std::make_shared(), "strlen"}); ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1}; SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count); diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index cac6393058..4ef50490ab 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -21,6 +21,7 @@ #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" +#include "vec/common/string_ref.h" #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_number.h" @@ -45,22 +46,18 @@ struct StrToDate { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { auto null_map = ColumnUInt8::create(input_rows_count, 0); - ColumnPtr argument_columns[2]; - // focus convert const to full column to simply execute logic - // handle - for (int i = 0; i < 2; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column(*argument_columns[i])) { - // Danger: Here must dispose the null map data first! Because - // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem - // of column nullable mem of null map - VectorizedUtils::update_null_map(null_map->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); - } - } + const auto& col0 = block.get_by_position(arguments[0]).column; + bool col_const[2] = {is_column_const(*col0)}; + ColumnPtr argument_columns[2] = { + col_const[0] ? static_cast(*col0).convert_to_full_column() + : col0}; + check_set_nullable(argument_columns[0], null_map); + //TODO: when we set default implementation for nullable, the check_set_nullable for arguments is useless. consider to remove it. + + std::tie(argument_columns[1], col_const[1]) = + unpack_if_const(block.get_by_position(arguments[1]).column); + check_set_nullable(argument_columns[1], null_map); auto specific_str_column = assert_cast(argument_columns[0].get()); auto specific_char_column = assert_cast(argument_columns[1].get()); @@ -75,53 +72,98 @@ struct StrToDate { WhichDataType which(remove_nullable(block.get_by_position(result).type)); if (which.is_date_time_v2()) { res = ColumnVector::create(); - executeImpl, UInt64>( - context, ldata, loffsets, rdata, roffsets, - static_cast*>(res->assume_mutable().get())->get_data(), - null_map->get_data()); + if (col_const[1]) { + execute_impl_const_right, UInt64>( + context, ldata, loffsets, specific_char_column->get_data_at(0), + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } else { + execute_impl, UInt64>( + context, ldata, loffsets, rdata, roffsets, + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } } else if (which.is_date_v2()) { res = ColumnVector::create(); - executeImpl, UInt32>( - context, ldata, loffsets, rdata, roffsets, - static_cast*>(res->assume_mutable().get())->get_data(), - null_map->get_data()); + if (col_const[1]) { + execute_impl_const_right, UInt32>( + context, ldata, loffsets, specific_char_column->get_data_at(0), + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } else { + execute_impl, UInt32>( + context, ldata, loffsets, rdata, roffsets, + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } } else { res = ColumnVector::create(); - executeImpl( - context, ldata, loffsets, rdata, roffsets, - static_cast*>(res->assume_mutable().get())->get_data(), - null_map->get_data()); + if (col_const[1]) { + execute_impl_const_right( + context, ldata, loffsets, specific_char_column->get_data_at(0), + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } else { + execute_impl( + context, ldata, loffsets, rdata, roffsets, + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } } - block.get_by_position(result).column = - ColumnNullable::create(std::move(res), std::move(null_map)); + block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map)); return Status::OK(); } +private: template - static void executeImpl(FunctionContext* context, const ColumnString::Chars& ldata, - const ColumnString::Offsets& loffsets, const ColumnString::Chars& rdata, - const ColumnString::Offsets& roffsets, PaddedPODArray& res, - NullMap& null_map) { + static void execute_impl(FunctionContext* context, const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, + const ColumnString::Chars& rdata, + const ColumnString::Offsets& roffsets, PaddedPODArray& res, + NullMap& null_map) { size_t size = loffsets.size(); res.resize(size); for (size_t i = 0; i < size; ++i) { const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); - int l_str_size = loffsets[i] - loffsets[i - 1]; + size_t l_str_size = loffsets[i] - loffsets[i - 1]; const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); - int r_str_size = roffsets[i] - roffsets[i - 1]; + size_t r_str_size = roffsets[i] - roffsets[i - 1]; - auto& ts_val = *reinterpret_cast(&res[i]); - if (!ts_val.from_date_format_str(r_raw_str, r_str_size, l_raw_str, l_str_size)) { - null_map[i] = 1; - } - if constexpr (std::is_same_v) { - if (context->get_return_type().type == doris::PrimitiveType::TYPE_DATETIME) { - ts_val.to_datetime(); - } else { - ts_val.cast_to_date(); - } + _execute_inner_loop(l_raw_str, l_str_size, r_raw_str, + r_str_size, context, res, null_map, i); + } + } + template + static void execute_impl_const_right(FunctionContext* context, const ColumnString::Chars& ldata, + const ColumnString::Offsets& loffsets, + const StringRef& rdata, PaddedPODArray& res, + NullMap& null_map) { + size_t size = loffsets.size(); + res.resize(size); + for (size_t i = 0; i < size; ++i) { + const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); + size_t l_str_size = loffsets[i] - loffsets[i - 1]; + + _execute_inner_loop(l_raw_str, l_str_size, rdata.data, + rdata.size, context, res, null_map, i); + } + } + template + static void _execute_inner_loop(const char* l_raw_str, size_t l_str_size, const char* r_raw_str, + size_t r_str_size, FunctionContext* context, + PaddedPODArray& res, NullMap& null_map, + size_t index) { + auto& ts_val = *reinterpret_cast(&res[index]); + if (!ts_val.from_date_format_str(r_raw_str, r_str_size, l_raw_str, l_str_size)) { + null_map[index] = 1; + } + if constexpr (std::is_same_v) { + if (context->get_return_type().type == doris::PrimitiveType::TYPE_DATETIME) { + ts_val.to_datetime(); + } else { + ts_val.cast_to_date(); } } } @@ -142,53 +184,87 @@ struct MakeDateImpl { size_t result, size_t input_rows_count) { auto null_map = ColumnUInt8::create(input_rows_count, 0); DCHECK_EQ(arguments.size(), 2); - ColumnPtr argument_columns[2]; - for (int i = 0; i < 2; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column(*argument_columns[i])) { - // Danger: Here must dispose the null map data first! Because - // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem - // of column nullable mem of null map - VectorizedUtils::update_null_map(null_map->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); - } - } + + const auto& col0 = block.get_by_position(arguments[0]).column; + bool col_const[2] = {is_column_const(*col0)}; + ColumnPtr argument_columns[2] = { + col_const[0] ? static_cast(*col0).convert_to_full_column() + : col0}; + check_set_nullable(argument_columns[0], null_map); + + std::tie(argument_columns[1], col_const[1]) = + unpack_if_const(block.get_by_position(arguments[1]).column); + check_set_nullable(argument_columns[1], null_map); ColumnPtr res = nullptr; WhichDataType which(remove_nullable(block.get_by_position(result).type)); if (which.is_date_v2()) { res = ColumnVector::create(); - executeImpl, UInt32>( - static_cast*>(argument_columns[0].get())->get_data(), - static_cast*>(argument_columns[1].get())->get_data(), - static_cast*>(res->assume_mutable().get())->get_data(), - null_map->get_data()); + if (col_const[1]) { + execute_impl_right_const, UInt32>( + static_cast*>(argument_columns[0].get()) + ->get_data(), + static_cast*>(argument_columns[1].get()) + ->get_element(0), + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } else { + execute_impl, UInt32>( + static_cast*>(argument_columns[0].get()) + ->get_data(), + static_cast*>(argument_columns[1].get()) + ->get_data(), + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } } else if (which.is_date_time_v2()) { res = ColumnVector::create(); - executeImpl, UInt64>( - static_cast*>(argument_columns[0].get())->get_data(), - static_cast*>(argument_columns[1].get())->get_data(), - static_cast*>(res->assume_mutable().get())->get_data(), - null_map->get_data()); + if (col_const[1]) { + execute_impl_right_const, UInt64>( + static_cast*>(argument_columns[0].get()) + ->get_data(), + static_cast*>(argument_columns[1].get()) + ->get_element(0), + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } else { + execute_impl, UInt64>( + static_cast*>(argument_columns[0].get()) + ->get_data(), + static_cast*>(argument_columns[1].get()) + ->get_data(), + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } } else { res = ColumnVector::create(); - executeImpl( - static_cast*>(argument_columns[0].get())->get_data(), - static_cast*>(argument_columns[1].get())->get_data(), - static_cast*>(res->assume_mutable().get())->get_data(), - null_map->get_data()); + if (col_const[1]) { + execute_impl_right_const( + static_cast*>(argument_columns[0].get()) + ->get_data(), + static_cast*>(argument_columns[1].get()) + ->get_element(0), + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } else { + execute_impl( + static_cast*>(argument_columns[0].get()) + ->get_data(), + static_cast*>(argument_columns[1].get()) + ->get_data(), + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data()); + } } - block.get_by_position(result).column = - ColumnNullable::create(std::move(res), std::move(null_map)); + block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map)); return Status::OK(); } +private: template - static void executeImpl(const PaddedPODArray& ldata, const PaddedPODArray& rdata, - PaddedPODArray& res, NullMap& null_map) { + static void execute_impl(const PaddedPODArray& ldata, const PaddedPODArray& rdata, + PaddedPODArray& res, NullMap& null_map) { auto len = ldata.size(); res.resize(len); @@ -199,25 +275,45 @@ struct MakeDateImpl { null_map[i] = 1; continue; } + _execute_inner_loop(l, r, res, null_map, i); + } + } + template + static void execute_impl_right_const(const PaddedPODArray& ldata, Int32 rdata, + PaddedPODArray& res, NullMap& null_map) { + auto len = ldata.size(); + res.resize(len); - auto& res_val = *reinterpret_cast(&res[i]); - if constexpr (std::is_same_v) { - VecDateTimeValue ts_value = VecDateTimeValue(); - ts_value.set_time(l, 1, 1, 0, 0, 0); + const auto& r = rdata; + for (size_t i = 0; i < len; ++i) { + const auto& l = ldata[i]; + if (r <= 0 || l < 0 || l > 9999) { + null_map[i] = 1; + continue; + } + _execute_inner_loop(l, r, res, null_map, i); + } + } + template + static void _execute_inner_loop(const int& l, const int& r, PaddedPODArray& res, + NullMap& null_map, size_t index) { + auto& res_val = *reinterpret_cast(&res[index]); + if constexpr (std::is_same_v) { + VecDateTimeValue ts_value = VecDateTimeValue(); + ts_value.set_time(l, 1, 1, 0, 0, 0); - TimeInterval interval(DAY, r - 1, false); - res_val = ts_value; - if (!res_val.template date_add_interval(interval)) { - null_map[i] = 1; - continue; - } - res_val.cast_to_date(); - } else { - res_val.set_time(l, 1, 1, 0, 0, 0, 0); - TimeInterval interval(DAY, r - 1, false); - if (!res_val.template date_add_interval(interval)) { - null_map[i] = 1; - } + TimeInterval interval(DAY, r - 1, false); + res_val = ts_value; + if (!res_val.template date_add_interval(interval)) { + null_map[index] = 1; + return; + } + res_val.cast_to_date(); + } else { + res_val.set_time(l, 1, 1, 0, 0, 0, 0); + TimeInterval interval(DAY, r - 1, false); + if (!res_val.template date_add_interval(interval)) { + null_map[index] = 1; } } } @@ -248,56 +344,84 @@ struct DateTrunc { static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { DCHECK_EQ(arguments.size(), 2); - ColumnPtr argument_columns[2]; + auto null_map = ColumnUInt8::create(input_rows_count, 0); - argument_columns[0] = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - argument_columns[1] = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + const auto& col0 = block.get_by_position(arguments[0]).column; + bool col_const[2] = {is_column_const(*col0)}; + ColumnPtr argument_columns[2] = { + col_const[0] ? static_cast(*col0).convert_to_full_column() + : col0}; + + std::tie(argument_columns[1], col_const[1]) = + unpack_if_const(block.get_by_position(arguments[1]).column); + auto datetime_column = static_cast*>(argument_columns[0].get()); auto str_column = static_cast(argument_columns[1].get()); auto& rdata = str_column->get_chars(); auto& roffsets = str_column->get_offsets(); ColumnPtr res = ColumnVector::create(); - executeImpl(datetime_column->get_data(), rdata, roffsets, + if (col_const[1]) { + execute_impl_right_const( + datetime_column->get_data(), str_column->get_data_at(0), static_cast*>(res->assume_mutable().get())->get_data(), null_map->get_data(), input_rows_count); + } else { + execute_impl( + datetime_column->get_data(), rdata, roffsets, + static_cast*>(res->assume_mutable().get())->get_data(), + null_map->get_data(), input_rows_count); + } - block.get_by_position(result).column = - ColumnNullable::create(std::move(res), std::move(null_map)); + block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map)); return Status::OK(); } - static void executeImpl(const PaddedPODArray& ldata, const ColumnString::Chars& rdata, - const ColumnString::Offsets& roffsets, PaddedPODArray& res, - NullMap& null_map, size_t input_rows_count) { +private: + static void execute_impl(const PaddedPODArray& ldata, const ColumnString::Chars& rdata, + const ColumnString::Offsets& roffsets, PaddedPODArray& res, + NullMap& null_map, size_t input_rows_count) { res.resize(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { auto dt = binary_cast(ldata[i]); const char* str_data = reinterpret_cast(&rdata[roffsets[i - 1]]); - if (std::strncmp("year", str_data, 4) == 0) { - null_map[i] = !dt.template datetime_trunc(); - } else if (std::strncmp("quarter", str_data, 7) == 0) { - null_map[i] = !dt.template datetime_trunc(); - } else if (std::strncmp("month", str_data, 5) == 0) { - null_map[i] = !dt.template datetime_trunc(); - } else if (std::strncmp("day", str_data, 3) == 0) { - null_map[i] = !dt.template datetime_trunc(); - } else if (std::strncmp("hour", str_data, 4) == 0) { - null_map[i] = !dt.template datetime_trunc(); - } else if (std::strncmp("minute", str_data, 6) == 0) { - null_map[i] = !dt.template datetime_trunc(); - } else if (std::strncmp("second", str_data, 6) == 0) { - null_map[i] = !dt.template datetime_trunc(); - } else if (std::strncmp("week", str_data, 4) == 0) { - null_map[i] = !dt.template datetime_trunc(); - } else { - null_map[i] = 1; - } - res[i] = binary_cast(dt); + _execute_inner_loop(dt, str_data, res, null_map, i); } } + static void execute_impl_right_const(const PaddedPODArray& ldata, + const StringRef& rdata, PaddedPODArray& res, + NullMap& null_map, size_t input_rows_count) { + res.resize(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) { + auto dt = binary_cast(ldata[i]); + const char* str_data = rdata.data; + _execute_inner_loop(dt, str_data, res, null_map, i); + } + } + template + static void _execute_inner_loop(T& dt, const char* str_data, PaddedPODArray& res, + NullMap& null_map, size_t index) { + if (std::strncmp("year", str_data, 4) == 0) { + null_map[index] = !dt.template datetime_trunc(); + } else if (std::strncmp("quarter", str_data, 7) == 0) { + null_map[index] = !dt.template datetime_trunc(); + } else if (std::strncmp("month", str_data, 5) == 0) { + null_map[index] = !dt.template datetime_trunc(); + } else if (std::strncmp("week", str_data, 4) == 0) { + null_map[index] = !dt.template datetime_trunc(); + } else if (std::strncmp("day", str_data, 3) == 0) { + null_map[index] = !dt.template datetime_trunc(); + } else if (std::strncmp("hour", str_data, 4) == 0) { + null_map[index] = !dt.template datetime_trunc(); + } else if (std::strncmp("minute", str_data, 6) == 0) { + null_map[index] = !dt.template datetime_trunc(); + } else if (std::strncmp("second", str_data, 6) == 0) { + null_map[index] = !dt.template datetime_trunc(); + } else { + null_map[index] = 1; + } + res[index] = binary_cast(dt); + } }; class FromDays : public IFunction { @@ -671,8 +795,7 @@ struct LastDayImpl { size_t input_rows_count) { const auto is_nullable = block.get_by_position(result).type->is_nullable(); ColumnPtr res_column; - ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column) - ->convert_to_full_column_if_const(); + ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column); if (is_nullable) { auto null_map = ColumnUInt8::create(input_rows_count, 0); if constexpr (std::is_same_v || @@ -700,6 +823,7 @@ struct LastDayImpl { static_cast*>(res_column->assume_mutable().get()) ->get_data()); } + if (const auto* nullable_col = check_and_get_column( block.get_by_position(arguments[0]).column.get())) { NullMap& result_null_map = assert_cast(*null_map).get_data(); @@ -824,8 +948,7 @@ struct MondayImpl { const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { const auto is_nullable = block.get_by_position(result).type->is_nullable(); - ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column) - ->convert_to_full_column_if_const(); + ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column); ColumnPtr res_column; if (is_nullable) { auto null_map = ColumnUInt8::create(input_rows_count, 0); @@ -1027,6 +1150,8 @@ public: } bool use_default_implementation_for_constants() const override { return true; } + //TODO: add function below when we fixed be-ut. + //ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { diff --git a/be/src/vec/functions/function_totype.h b/be/src/vec/functions/function_totype.h index 344223a024..0fa2724a22 100644 --- a/be/src/vec/functions/function_totype.h +++ b/be/src/vec/functions/function_totype.h @@ -59,7 +59,9 @@ public: } DataTypes get_variadic_argument_types_impl() const override { - if constexpr (has_variadic_argument) return Impl::get_variadic_argument_types(); + if constexpr (has_variadic_argument) { + return Impl::get_variadic_argument_types(); + } return {}; } @@ -232,8 +234,8 @@ private: nullptr> Status execute_inner_impl(const ColumnWithTypeAndName& left, const ColumnWithTypeAndName& right, Block& block, const ColumnNumbers& arguments, size_t result) { - auto lcol = left.column->convert_to_full_column_if_const(); - auto rcol = right.column->convert_to_full_column_if_const(); + const auto& [lcol, left_const] = unpack_if_const(left.column); + const auto& [rcol, right_const] = unpack_if_const(right.column); using ResultType = typename ResultDataType::FieldType; using ColVecResult = ColumnVector; @@ -244,9 +246,20 @@ private: if (auto col_left = check_and_get_column(lcol.get())) { if (auto col_right = check_and_get_column(rcol.get())) { - Impl::vector_vector( - col_left->get_chars(), col_left->get_offsets(), col_right->get_chars(), - col_right->get_offsets(), vec_res); + if (left_const) { + Impl::scalar_vector( + col_left->get_data_at(0), col_right->get_chars(), + col_right->get_offsets(), vec_res); + } else if (right_const) { + Impl::vector_scalar( + col_left->get_chars(), col_left->get_offsets(), + col_right->get_data_at(0), vec_res); + } else { + Impl::vector_vector( + col_left->get_chars(), col_left->get_offsets(), col_right->get_chars(), + col_right->get_offsets(), vec_res); + } + block.replace_by_position(result, std::move(col_res)); return Status::OK(); } @@ -259,16 +272,28 @@ private: nullptr> Status execute_inner_impl(const ColumnWithTypeAndName& left, const ColumnWithTypeAndName& right, Block& block, const ColumnNumbers& arguments, size_t result) { - auto lcol = left.column->convert_to_full_column_if_const(); - auto rcol = right.column->convert_to_full_column_if_const(); + const auto& [lcol, left_const] = unpack_if_const(left.column); + const auto& [rcol, right_const] = unpack_if_const(right.column); using ColVecResult = ColumnString; typename ColVecResult::MutablePtr col_res = ColVecResult::create(); if (auto col_left = check_and_get_column(lcol.get())) { if (auto col_right = check_and_get_column(rcol.get())) { - Impl::vector_vector( - col_left->get_chars(), col_left->get_offsets(), col_right->get_chars(), - col_right->get_offsets(), col_res->get_chars(), col_res->get_offsets()); + if (left_const) { + Impl::scalar_vector( + col_left->get_data_at(0), col_right->get_chars(), + col_right->get_offsets(), col_res->get_chars(), col_res->get_offsets()); + } else if (right_const) { + Impl::vector_scalar( + col_left->get_chars(), col_left->get_offsets(), + col_right->get_data_at(0), col_res->get_chars(), + col_res->get_offsets()); + } else { + Impl::vector_vector( + col_left->get_chars(), col_left->get_offsets(), col_right->get_chars(), + col_right->get_offsets(), col_res->get_chars(), col_res->get_offsets()); + } + block.replace_by_position(result, std::move(col_res)); return Status::OK(); } @@ -299,18 +324,13 @@ public: size_t result, size_t input_rows_count) override { auto null_map = ColumnUInt8::create(input_rows_count, 0); DCHECK_EQ(arguments.size(), 2); + ColumnPtr argument_columns[2]; + bool col_const[2]; for (int i = 0; i < 2; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column(*argument_columns[i])) { - // Danger: Here must dispose the null map data first! Because - // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem - // of column nullable mem of null map - VectorizedUtils::update_null_map(null_map->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); - } + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + check_set_nullable(argument_columns[i], null_map); } using ResultDataType = typename Impl(argument_columns[0].get())) { if (auto col_right = check_and_get_column(argument_columns[1].get())) { - Impl::vector_vector( - col_left->get_data(), col_right->get_data(), vec_res, null_map->get_data()); + if (col_const[0]) { + Impl::scalar_vector( + col_left->get_data()[0], col_right->get_data(), vec_res, + null_map->get_data()); + } else if (col_const[1]) { + Impl::vector_scalar( + col_left->get_data(), col_right->get_data()[0], vec_res, + null_map->get_data()); + } else { + Impl::vector_vector( + col_left->get_data(), col_right->get_data(), vec_res, + null_map->get_data()); + } + block.get_by_position(result).column = ColumnNullable::create(std::move(col_res), std::move(null_map)); return Status::OK(); @@ -366,20 +398,11 @@ public: size_t result, size_t input_rows_count) override { auto null_map = ColumnUInt8::create(input_rows_count, 0); ColumnPtr argument_columns[2]; - - // focus convert const to full column to simply execute logic - // handle + bool col_const[2]; for (int i = 0; i < 2; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column(*argument_columns[i])) { - // Danger: Here must dispose the null map data first! Because - // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem - // of column nullable mem of null map - VectorizedUtils::update_null_map(null_map->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); - } + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + check_set_nullable(argument_columns[i], null_map); } auto res = Impl::ColumnType::create(); @@ -397,11 +420,27 @@ public: if constexpr (std::is_same_v) { auto& res_data = res->get_chars(); auto& res_offsets = res->get_offsets(); - Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res_data, res_offsets, - null_map->get_data()); + if (col_const[0]) { + Impl::scalar_vector(context, specific_str_column->get_data_at(0), rdata, roffsets, + res_data, res_offsets, null_map->get_data()); + } else if (col_const[1]) { + Impl::vector_scalar(context, ldata, loffsets, specific_char_column->get_data_at(0), + res_data, res_offsets, null_map->get_data()); + } else { + Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res_data, + res_offsets, null_map->get_data()); + } } else { - Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(), - null_map->get_data()); + if (col_const[0]) { + Impl::scalar_vector(context, specific_str_column->get_data_at(0), rdata, roffsets, + res->get_data(), null_map->get_data()); + } else if (col_const[1]) { + Impl::vector_scalar(context, ldata, loffsets, specific_char_column->get_data_at(0), + res->get_data(), null_map->get_data()); + } else { + Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(), + null_map->get_data()); + } } block.get_by_position(result).column = diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h index 1d72a5d5b8..2a24af8def 100644 --- a/be/src/vec/functions/functions_comparison.h +++ b/be/src/vec/functions/functions_comparison.h @@ -61,11 +61,6 @@ struct NumComparisonImpl { /// If you don't specify NO_INLINE, the compiler will inline this function, but we don't need this as this function contains tight loop inside. static void NO_INLINE vector_vector(const PaddedPODArray& a, const PaddedPODArray& b, PaddedPODArray& c) { - /** GCC 4.8.2 vectorized a loop only if it is written in this form. - * In this case, if you loop through the array index (the code will look simpler), - * the loop will not be vectorized. - */ - size_t size = a.size(); const A* a_pos = a.data(); const B* b_pos = b.data(); @@ -104,17 +99,17 @@ struct NumComparisonImpl { /// Generic version, implemented for columns of same type. template struct GenericComparisonImpl { - static void NO_INLINE vector_vector(const IColumn& a, const IColumn& b, - PaddedPODArray& c) { - for (size_t i = 0, size = a.size(); i < size; ++i) + static void vector_vector(const IColumn& a, const IColumn& b, PaddedPODArray& c) { + for (size_t i = 0, size = a.size(); i < size; ++i) { c[i] = Op::apply(a.compare_at(i, i, b, 1), 0); + } } - static void NO_INLINE vector_constant(const IColumn& a, const IColumn& b, - PaddedPODArray& c) { - auto b_materialized = b.clone_resized(1)->convert_to_full_column_if_const(); - for (size_t i = 0, size = a.size(); i < size; ++i) - c[i] = Op::apply(a.compare_at(i, 0, *b_materialized, 1), 0); + static void vector_constant(const IColumn& a, const IColumn& b, PaddedPODArray& c) { + const auto& col_right = assert_cast(b).get_data_column(); + for (size_t i = 0, size = a.size(); i < size; ++i) { + c[i] = Op::apply(a.compare_at(i, 0, col_right, 1), 0); + } } static void constant_vector(const IColumn& a, const IColumn& b, PaddedPODArray& c) { @@ -540,7 +535,7 @@ public: if (left_type->equals(*right_type) && !left_type->is_nullable() && col_left_untyped == col_right_untyped) { /// Always true: =, <=, >= - // TODO: Return const column in the future + // TODO: Return const column in the future. But seems so far to do. We need a unified approach for passing const column. if constexpr (std::is_same_v, EqualsOp> || std::is_same_v, LessOrEqualsOp> || std::is_same_v, GreaterOrEqualsOp>) { diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h index af7ff2b8a5..2599dd3e4f 100644 --- a/be/src/vec/functions/in.h +++ b/be/src/vec/functions/in.h @@ -21,6 +21,8 @@ #include #include "exprs/create_predicate_function.h" +#include "vec/columns/column.h" +#include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/columns_number.h" #include "vec/data_types/data_type.h" @@ -115,7 +117,7 @@ public: /// First argument may be a single column. const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]); - auto materialized_column = left_arg.column->convert_to_full_column_if_const(); + const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column); if (in_state->use_set) { if (materialized_column->is_nullable()) { @@ -125,23 +127,16 @@ public: null_col_ptr->get_null_map_column()) .get_data(); auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get(); - auto search_hash_set = [&](auto* col_ptr) { - if constexpr (!negative) { - in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, - null_map, vec_res); - } else { - in_state->hybrid_set->find_batch_nullable_negative( - *col_ptr, input_rows_count, null_map, vec_res); - } - }; if (nested_col_ptr->is_column_string()) { const auto* column_string_ptr = reinterpret_cast(nested_col_ptr); - search_hash_set(column_string_ptr); + search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map, + column_string_ptr); } else { - // todo support other column type - search_hash_set(nested_col_ptr); + //TODO: support other column type + search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map, + nested_col_ptr); } if (!in_state->null_in_set) { @@ -155,23 +150,13 @@ public: } } else { // non-nullable - - auto search_hash_set = [&](auto* col_ptr) { - if constexpr (!negative) { - in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res); - } else { - in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, - vec_res); - } - }; - if (materialized_column->is_column_string()) { const auto* column_string_ptr = reinterpret_cast( materialized_column.get()); - search_hash_set(column_string_ptr); + search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr); } else { - search_hash_set(materialized_column.get()); + search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get()); } if (in_state->null_in_set) { @@ -180,37 +165,17 @@ public: } } } - } else { + } else { //!in_state->use_set std::vector set_columns; for (int i = 1; i < arguments.size(); ++i) { set_columns.emplace_back(block.get_by_position(arguments[i]).column); } - - for (size_t i = 0; i < input_rows_count; ++i) { - const auto& ref_data = materialized_column->get_data_at(i); - if (ref_data.data == nullptr) { - vec_null_map_to[i] = true; - continue; - } - - std::unique_ptr hybrid_set( - create_set(context->get_arg_type(0)->type, set_columns.size())); - bool null_in_set = false; - - for (const auto& set_column : set_columns) { - auto set_data = set_column->get_data_at(i); - if (set_data.data == nullptr) { - null_in_set = true; - } else { - hybrid_set->insert((void*)(set_data.data), set_data.size); - } - } - vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size); - if (null_in_set) { - vec_null_map_to[i] = negative == vec_res[i]; - } else { - vec_null_map_to[i] = false; - } + if (col_const) { + impl_without_set(context, set_columns, input_rows_count, vec_res, + vec_null_map_to, materialized_column); + } else { + impl_without_set(context, set_columns, input_rows_count, vec_res, + vec_null_map_to, materialized_column); } } @@ -227,6 +192,64 @@ public: Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { return Status::OK(); } + +private: + template + static void search_hash_set_check_null(InState* in_state, size_t input_rows_count, + ColumnUInt8::Container& vec_res, + const ColumnUInt8::Container& null_map, T* col_ptr) { + if constexpr (!negative) { + in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map, + vec_res); + } else { + in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map, + vec_res); + } + } + + template + static void search_hash_set(InState* in_state, size_t input_rows_count, + ColumnUInt8::Container& vec_res, T* col_ptr) { + if constexpr (!negative) { + in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res); + } else { + in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res); + } + } + + template + static void impl_without_set(FunctionContext* context, + const std::vector& set_columns, size_t input_rows_count, + ColumnUInt8::Container& vec_res, + ColumnUInt8::Container& vec_null_map_to, + const ColumnPtr& materialized_column) { + for (size_t i = 0; i < input_rows_count; ++i) { + const auto& ref_data = materialized_column->get_data_at(index_check_const(i, Const)); + if (ref_data.data == nullptr) { + vec_null_map_to[i] = true; + continue; + } + + std::unique_ptr hybrid_set( + create_set(context->get_arg_type(0)->type, set_columns.size())); + bool null_in_set = false; + + for (const auto& set_column : set_columns) { + auto set_data = set_column->get_data_at(i); + if (set_data.data == nullptr) { + null_in_set = true; + } else { + hybrid_set->insert((void*)(set_data.data), set_data.size); + } + } + vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size); + if (null_in_set) { + vec_null_map_to[i] = negative == vec_res[i]; + } else { + vec_null_map_to[i] = false; + } + } + } }; } // namespace doris::vectorized diff --git a/be/src/vec/functions/least_greast.cpp b/be/src/vec/functions/least_greast.cpp index 5125f9a5b6..3d841e2da7 100644 --- a/be/src/vec/functions/least_greast.cpp +++ b/be/src/vec/functions/least_greast.cpp @@ -73,12 +73,15 @@ struct CompareMultiImpl { result_column->insert_range_from(*(cols[0]), 0, input_rows_count); WhichDataType which(data_type); -#define DISPATCH(TYPE, COLUMN_TYPE) \ - if (which.idx == TypeIndex::TYPE) { \ - for (int i = 1; i < arguments.size(); ++i) { \ - insert_result_data(result_column, cols[i], input_rows_count, \ - col_const[i]); \ - } \ +#define DISPATCH(TYPE, COLUMN_TYPE) \ + if (which.idx == TypeIndex::TYPE) { \ + for (int i = 1; i < arguments.size(); ++i) { \ + if (col_const[i]) { \ + insert_result_data(result_column, cols[i], input_rows_count); \ + } else { \ + insert_result_data(result_column, cols[i], input_rows_count); \ + } \ + } \ } NUMERIC_TYPE_TO_COLUMN_TYPE(DISPATCH) DISPATCH(Decimal128, ColumnDecimal) @@ -90,10 +93,10 @@ struct CompareMultiImpl { } private: - template + template static void insert_result_data(const MutableColumnPtr& result_column, - const ColumnPtr& argument_column, const size_t input_rows_count, - const bool arg_const) { + const ColumnPtr& argument_column, + const size_t input_rows_count) { auto* __restrict result_raw_data = reinterpret_cast(result_column.get())->get_data().data(); auto* __restrict column_raw_data = @@ -101,19 +104,19 @@ private: if constexpr (std::is_same_v) { for (size_t i = 0; i < input_rows_count; ++i) { - result_raw_data[i] = Op::apply( - column_raw_data[index_check_const(i, arg_const)], - result_raw_data[i]) - ? column_raw_data[index_check_const(i, arg_const)] - : result_raw_data[i]; + result_raw_data[i] = + Op::apply( + column_raw_data[index_check_const(i, ArgConst)], result_raw_data[i]) + ? column_raw_data[index_check_const(i, ArgConst)] + : result_raw_data[i]; } } else { for (size_t i = 0; i < input_rows_count; ++i) { using type = std::decay_t; result_raw_data[i] = - Op::apply(column_raw_data[index_check_const(i, arg_const)], + Op::apply(column_raw_data[index_check_const(i, ArgConst)], result_raw_data[i]) - ? column_raw_data[index_check_const(i, arg_const)] + ? column_raw_data[index_check_const(i, ArgConst)] : result_raw_data[i]; } } @@ -138,13 +141,16 @@ struct FunctionFieldImpl { for (int i = 0; i < column_size; ++i) { argument_columns[i] = block.get_by_position(arguments[i]).column; } - argument_columns[0] = argument_columns[0]->convert_to_full_column_if_const(); + + bool arg_const; + std::tie(argument_columns[0], arg_const) = unpack_if_const(argument_columns[0]); + WhichDataType which(data_type); //TODO: maybe could use hashmap to save column data, not use for loop ervey time to test equals. if (which.is_string_or_fixed_string()) { const auto& column_string = reinterpret_cast(*argument_columns[0]); for (int row = 0; row < input_rows_count; ++row) { - const auto& str_data = column_string.get_data_at(row); + const auto& str_data = column_string.get_data_at(index_check_const(row, arg_const)); for (int col = 1; col < column_size; ++col) { const auto& temp_data = reinterpret_cast(*argument_columns[col]) @@ -155,13 +161,21 @@ struct FunctionFieldImpl { } } } - } else { -#define DISPATCH(TYPE, COLUMN_TYPE) \ - if (which.idx == TypeIndex::TYPE) { \ - for (int col = 1; col < arguments.size(); ++col) { \ - insert_result_data(res_data, argument_columns[0], argument_columns[col], \ - input_rows_count, col); \ - } \ + + } else { //string or not +#define DISPATCH(TYPE, COLUMN_TYPE) \ + if (which.idx == TypeIndex::TYPE) { \ + for (int col = 1; col < arguments.size(); ++col) { \ + if (arg_const) { \ + insert_result_data(res_data, argument_columns[0], \ + argument_columns[col], input_rows_count, \ + col); \ + } else { \ + insert_result_data(res_data, argument_columns[0], \ + argument_columns[col], input_rows_count, \ + col); \ + } \ + } \ } NUMERIC_TYPE_TO_COLUMN_TYPE(DISPATCH) DISPATCH(Decimal128, ColumnDecimal) @@ -173,7 +187,7 @@ struct FunctionFieldImpl { } private: - template + template static void insert_result_data(PaddedPODArray& __restrict res_data, ColumnPtr first_column, ColumnPtr argument_column, const size_t input_rows_count, const int col) { @@ -186,15 +200,17 @@ private: if constexpr (std::is_same_v) { for (size_t i = 0; i < input_rows_count; ++i) { res_data[i] |= (!res_data[i] * - (EqualsOp::apply(first_raw_data[i], - arg_data)) * + (EqualsOp::apply( + first_raw_data[index_check_const(i, ArgConst)], arg_data)) * col); } } else { for (size_t i = 0; i < input_rows_count; ++i) { using type = std::decay_t; res_data[i] |= (!res_data[i] * - (EqualsOp::apply(first_raw_data[i], arg_data)) * col); + (EqualsOp::apply( + first_raw_data[index_check_const(i, ArgConst)], arg_data)) * + col); } } } diff --git a/be/src/vec/functions/round.h b/be/src/vec/functions/round.h index 5686417a30..c06500bf87 100644 --- a/be/src/vec/functions/round.h +++ b/be/src/vec/functions/round.h @@ -503,12 +503,9 @@ public: static Status get_scale_arg(const ColumnWithTypeAndName& arguments, Int16* scale) { const IColumn& scale_column = *arguments.column; - Int32 scale64 = static_cast( - &(is_column_const(scale_column) - ? static_cast(&scale_column) - ->get_data_column() - : scale_column)) - ->get_element(0); + Int32 scale64 = static_cast( + static_cast(&scale_column)->get_data_column()) + .get_element(0); if (scale64 > std::numeric_limits::max() || scale64 < std::numeric_limits::min()) { diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_apply.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_apply.md index 6b5450f73f..0f14989a35 100644 --- a/docs/en/docs/sql-manual/sql-functions/array-functions/array_apply.md +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_apply.md @@ -43,8 +43,8 @@ array_apply(arr, op, val) #### Arguments `arr` — The array to inspect. If it null, null will be returned. -`op` — The compare operation, op includes `=`, `>=`, `<=`, `>`, `<`, `!=` -`val` — The compared value.If it null, null will be returned. +`op` — The compare operation, op includes `=`, `>=`, `<=`, `>`, `<`, `!=`. Support const value only. +`val` — The compared value.If it null, null will be returned. Support const value only. #### Returned value diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_apply.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_apply.md index fb94859e3a..6b90a67b86 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_apply.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_apply.md @@ -42,8 +42,8 @@ array_apply(arr, op, val) #### Arguments `arr` — 输入的数组, 如果是null, 则返回null -`op` — 过滤条件, 条件包括 `=`, `>=`, `<=`, `>`, `<`, `!=` -`val` — 过滤的条件值, 如果是null, 则返回null +`op` — 过滤条件, 条件包括 `=`, `>=`, `<=`, `>`, `<`, `!=`,仅支持常量 +`val` — 过滤的条件值, 如果是null, 则返回null,仅支持常量 #### Returned value diff --git a/regression-test/data/correctness_p0/test_partial_const_args_for_function.out b/regression-test/data/correctness_p0/test_partial_const_args_for_function.out new file mode 100644 index 0000000000..96497d1b74 --- /dev/null +++ b/regression-test/data/correctness_p0/test_partial_const_args_for_function.out @@ -0,0 +1,37 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- +0 +1 +2 +3 +4 +5 +5 +5 +5 +5 + +-- !select2 -- +0 +1 +2 +3 +4 +5 +5 +5 +5 +5 + +-- !select3 -- +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + diff --git a/regression-test/suites/correctness_p0/test_partial_const_args_for_function.groovy b/regression-test/suites/correctness_p0/test_partial_const_args_for_function.groovy new file mode 100644 index 0000000000..0f0da4a2ec --- /dev/null +++ b/regression-test/suites/correctness_p0/test_partial_const_args_for_function.groovy @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one + // or more contributor license agreements. See the NOTICE file + // distributed with this work for additional information + // regarding copyright ownership. The ASF licenses this file + // to you under the Apache License, Version 2.0 (the + // "License"); you may not use this file except in compliance + // with the License. You may obtain a copy of the License at + // + // http://www.apache.org/licenses/LICENSE-2.0 + // + // Unless required by applicable law or agreed to in writing, + // software distributed under the License is distributed on an + // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + // KIND, either express or implied. See the License for the + // specific language governing permissions and limitations + // under the License. + + +suite("test_partial_const_args_for_function") { + def tableName = "test_partial_const_args_for_function" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `a` int + ) ENGINE=OLAP + DUPLICATE KEY(`a`) + DISTRIBUTED BY HASH(`a`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ); + """ + + sql """insert into ${tableName} select * from numbers("number" = "10")""" + qt_select1 "select least(a, 5, 6) as k1 from ${tableName} order by k1" + qt_select2 "select least(a, 5, a) as k1 from ${tableName} order by k1" + qt_select3 "select least(a, a, a) as k1 from ${tableName} order by k1" +}