diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 1dd74261d8..ea8b952f8c 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -464,8 +464,8 @@ public: // only used in ColumnNullable replace_column_data virtual void replace_column_data_default(size_t self_row = 0) = 0; - virtual bool is_date_type() { return is_date; } - virtual bool is_datetime_type() { return is_date_time; } + virtual bool is_date_type() const { return is_date; } + virtual bool is_datetime_type() const { return is_date_time; } virtual void set_date_type() { is_date = true; } virtual void set_datetime_type() { is_date_time = true; } diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index 4f3a2bc3d4..98c2d05ef4 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -71,6 +71,7 @@ private: friend class COWHelper; public: + using value_type = T; using Container = DecimalPaddedPODArray; private: diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 482b9a5bfe..410ed8b42a 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -163,8 +163,8 @@ public: return false; } - bool is_date_type() override { return get_nested_column().is_date_type(); } - bool is_datetime_type() override { return get_nested_column().is_datetime_type(); } + bool is_date_type() const override { return get_nested_column().is_date_type(); } + bool is_datetime_type() const override { return get_nested_column().is_datetime_type(); } void set_date_type() override { get_nested_column().set_date_type(); } void set_datetime_type() override { get_nested_column().set_datetime_type(); } diff --git a/be/src/vec/data_types/data_type_date.cpp b/be/src/vec/data_types/data_type_date.cpp index a23dde0725..2508570a53 100644 --- a/be/src/vec/data_types/data_type_date.cpp +++ b/be/src/vec/data_types/data_type_date.cpp @@ -65,4 +65,10 @@ void DataTypeDate::cast_to_date(Int64& x) { x = binary_cast(value); } +MutableColumnPtr DataTypeDate::create_column() const { + auto col = DataTypeNumberBase::create_column(); + col->set_date_type(); + return col; +} + } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_date.h b/be/src/vec/data_types/data_type_date.h index 160b7d4a0c..06f8ff0d0e 100644 --- a/be/src/vec/data_types/data_type_date.h +++ b/be/src/vec/data_types/data_type_date.h @@ -38,6 +38,8 @@ public: void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; static void cast_to_date(Int64& x); + + MutableColumnPtr create_column() const override; }; } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_date_time.cpp b/be/src/vec/data_types/data_type_date_time.cpp index 5cb568b3ce..5271091f87 100644 --- a/be/src/vec/data_types/data_type_date_time.cpp +++ b/be/src/vec/data_types/data_type_date_time.cpp @@ -88,4 +88,10 @@ void DataTypeDateTime::cast_to_date_time(Int64& x) { x = binary_cast(value); } +MutableColumnPtr DataTypeDateTime::create_column() const { + auto col = DataTypeNumberBase::create_column(); + col->set_datetime_type(); + return col; +} + } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_date_time.h b/be/src/vec/data_types/data_type_date_time.h index a429e2d0f2..adf41bfc8b 100644 --- a/be/src/vec/data_types/data_type_date_time.h +++ b/be/src/vec/data_types/data_type_date_time.h @@ -65,6 +65,8 @@ public: void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; static void cast_to_date_time(Int64& x); + + MutableColumnPtr create_column() const override; }; template diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp index cb410c3794..f1c3002218 100644 --- a/be/src/vec/exec/vunion_node.cpp +++ b/be/src/vec/exec/vunion_node.cpp @@ -189,7 +189,8 @@ Status VUnionNode::get_next_const(RuntimeState* state, Block* block) { int const_expr_lists_size = _const_expr_lists[_const_expr_list_idx].size(); std::vector result_list(const_expr_lists_size); for (size_t i = 0; i < const_expr_lists_size; ++i) { - _const_expr_lists[_const_expr_list_idx][i]->execute(&tmp_block, &result_list[i]); + RETURN_IF_ERROR(_const_expr_lists[_const_expr_list_idx][i]->execute(&tmp_block, + &result_list[i])); } tmp_block.erase_not_in(result_list); mblock.merge(tmp_block); diff --git a/be/src/vec/functions/array/function_array_element.h b/be/src/vec/functions/array/function_array_element.h index eae3375bca..a37913915b 100644 --- a/be/src/vec/functions/array/function_array_element.h +++ b/be/src/vec/functions/array/function_array_element.h @@ -90,9 +90,11 @@ private: ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, const UInt8* nested_null_map, UInt8* dst_null_map) { - const auto& nested_data = check_and_get_column(nested_column)->get_data(); - auto dst_column = ColumnType::create(offsets.size()); - auto& dst_data = dst_column->get_data(); + const auto& nested_data = reinterpret_cast(nested_column).get_data(); + + auto dst_column = nested_column.clone_empty(); + auto& dst_data = reinterpret_cast(*dst_column).get_data(); + dst_data.resize(offsets.size()); // process for (size_t row = 0; row < offsets.size(); ++row) { @@ -129,8 +131,10 @@ private: ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, const UInt8* nested_null_map, UInt8* dst_null_map) { - const auto& src_str_offs = check_and_get_column(nested_column)->get_offsets(); - const auto& src_str_chars = check_and_get_column(nested_column)->get_chars(); + const auto& src_str_offs = + reinterpret_cast(nested_column).get_offsets(); + const auto& src_str_chars = + reinterpret_cast(nested_column).get_chars(); // prepare return data auto dst_column = ColumnString::create(); @@ -181,23 +185,33 @@ private: const DataTypePtr& result_type, size_t input_rows_count, const UInt8* src_null_map, UInt8* dst_null_map) { // check array nested column type and get data - auto array_column = check_and_get_column(*arguments[0].column); - DCHECK(array_column != nullptr); - const auto& offsets = array_column->get_offsets(); + auto left_column = arguments[0].column->convert_to_full_column_if_const(); + const auto& array_column = reinterpret_cast(*left_column); + const auto& offsets = array_column.get_offsets(); DCHECK(offsets.size() == input_rows_count); const UInt8* nested_null_map = nullptr; ColumnPtr nested_column = nullptr; - if (is_column_nullable(array_column->get_data())) { + if (is_column_nullable(array_column.get_data())) { const auto& nested_null_column = - check_and_get_column(array_column->get_data()); - nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - nested_column = nested_null_column->get_nested_column_ptr(); + reinterpret_cast(array_column.get_data()); + nested_null_map = nested_null_column.get_null_map_column().get_data().data(); + nested_column = nested_null_column.get_nested_column_ptr(); } else { - nested_column = array_column->get_data_ptr(); + nested_column = array_column.get_data_ptr(); } ColumnPtr res = nullptr; - if (check_column(*nested_column)) { + if (nested_column->is_date_type()) { + res = _execute_number(offsets, *nested_column, src_null_map, + *arguments[1].column, nested_null_map, dst_null_map); + } else if (nested_column->is_datetime_type()) { + res = _execute_number(offsets, *nested_column, src_null_map, + *arguments[1].column, nested_null_map, + dst_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number(offsets, *nested_column, src_null_map, + *arguments[1].column, nested_null_map, dst_null_map); + } else if (check_column(*nested_column)) { res = _execute_number(offsets, *nested_column, src_null_map, *arguments[1].column, nested_null_map, dst_null_map); } else if (check_column(*nested_column)) { @@ -209,6 +223,10 @@ private: } else if (check_column(*nested_column)) { res = _execute_number(offsets, *nested_column, src_null_map, *arguments[1].column, nested_null_map, dst_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number(offsets, *nested_column, src_null_map, + *arguments[1].column, nested_null_map, + dst_null_map); } else if (check_column(*nested_column)) { res = _execute_number(offsets, *nested_column, src_null_map, *arguments[1].column, nested_null_map, @@ -217,6 +235,10 @@ private: res = _execute_number(offsets, *nested_column, src_null_map, *arguments[1].column, nested_null_map, dst_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number(offsets, *nested_column, src_null_map, + *arguments[1].column, nested_null_map, + dst_null_map); } else if (check_column(*nested_column)) { res = _execute_string(offsets, *nested_column, src_null_map, *arguments[1].column, nested_null_map, dst_null_map); diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index 435fa7a1d5..dd46373ce5 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -67,54 +67,25 @@ public: } private: - static bool _execute_string(Block& block, const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { + ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const UInt8* nested_null_map, + const IColumn& nested_column, const IColumn& right_column) { // check array nested column type and get data - auto array_column = - check_and_get_column(*block.get_by_position(arguments[0]).column); - DCHECK(array_column != nullptr); - const ColumnString* nested_column = nullptr; - const UInt8* nested_null_map = nullptr; - auto nested_null_column = check_and_get_column(array_column->get_data()); - if (nested_null_column) { - nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - nested_column = - check_and_get_column(nested_null_column->get_nested_column()); - } else { - nested_column = check_and_get_column(array_column->get_data()); - } - if (!nested_column) { - return false; - } - const auto& arr_offs = array_column->get_offsets(); - const auto& str_offs = nested_column->get_offsets(); - const auto& str_chars = nested_column->get_chars(); + const auto& str_offs = reinterpret_cast(nested_column).get_offsets(); + const auto& str_chars = reinterpret_cast(nested_column).get_chars(); - // check right column type - auto ptr = block.get_by_position(arguments[1]).column; - if (is_column_const(*ptr)) { - ptr = check_and_get_column(ptr)->get_data_column_ptr(); - } - if (!check_and_get_column(*ptr)) { - return false; - } - - // expand const column and get data - auto right_column = check_and_get_column( - *block.get_by_position(arguments[1]).column->convert_to_full_column_if_const()); - const auto& right_offs = right_column->get_offsets(); - const auto& right_chars = right_column->get_chars(); + // check right column type and get data + const auto& right_offs = reinterpret_cast(right_column).get_offsets(); + const auto& right_chars = reinterpret_cast(right_column).get_chars(); // prepare return data - auto dst = ColumnVector::create(); + auto dst = ColumnVector::create(offsets.size()); auto& dst_data = dst->get_data(); - dst_data.resize(input_rows_count); // process - for (size_t row = 0; row < input_rows_count; ++row) { + for (size_t row = 0; row < offsets.size(); ++row) { ResultType res = 0; - size_t off = arr_offs[row - 1]; - size_t len = arr_offs[row] - off; + size_t off = offsets[row - 1]; + size_t len = offsets[row] - off; size_t right_off = right_offs[row - 1]; size_t right_len = right_offs[row] - right_off; @@ -135,69 +106,25 @@ private: } dst_data[row] = res; } - block.replace_by_position(result, std::move(dst)); - return true; + return dst; } -#define NUMBER_TPL_PACK Int8, Int16, Int32, Int64, Float32, Float64 - template - static bool _execute_number(Block& block, const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { - return (_execute_number_expanded(block, arguments, result, - input_rows_count) || - ...); - } - template - static bool _execute_number_expanded(Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { - return (_execute_number_impl(block, arguments, result, input_rows_count) || ...); - } - template - static bool _execute_number_impl(Block& block, const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { + template + ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const UInt8* nested_null_map, + const IColumn& nested_column, const IColumn& right_column) { // check array nested column type and get data - auto array_column = - check_and_get_column(*block.get_by_position(arguments[0]).column); - DCHECK(array_column != nullptr); - const ColumnVector* nested_column = nullptr; - const UInt8* nested_null_map = nullptr; - auto nested_null_column = check_and_get_column(array_column->get_data()); - if (nested_null_column) { - nested_null_map = nested_null_column->get_null_map_column().get_data().data(); - nested_column = check_and_get_column>( - nested_null_column->get_nested_column()); - } else { - nested_column = - check_and_get_column>(array_column->get_data()); - } - if (!nested_column) { - return false; - } - const auto& offsets = array_column->get_offsets(); - const auto& nested_data = nested_column->get_data(); + const auto& nested_data = + reinterpret_cast(nested_column).get_data(); - // check right column type - auto ptr = block.get_by_position(arguments[1]).column; - if (is_column_const(*ptr)) { - ptr = check_and_get_column(ptr)->get_data_column_ptr(); - } - if (!check_and_get_column>(*ptr)) { - return false; - } - - // expand const column and get data - auto right_column = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - const auto& right_data = - check_and_get_column>(*right_column)->get_data(); + // check right column type and get data + const auto& right_data = reinterpret_cast(right_column).get_data(); // prepare return data - auto dst = ColumnVector::create(); + auto dst = ColumnVector::create(offsets.size()); auto& dst_data = dst->get_data(); - dst_data.resize(input_rows_count); // process - for (size_t row = 0; row < input_rows_count; ++row) { + for (size_t row = 0; row < offsets.size(); ++row) { ResultType res = 0; size_t off = offsets[row - 1]; size_t len = offsets[row] - off; @@ -213,24 +140,130 @@ private: } dst_data[row] = res; } - block.replace_by_position(result, std::move(dst)); - return true; + return dst; + } + + template + ColumnPtr _execute_number_expanded(const ColumnArray::Offsets& offsets, + const UInt8* nested_null_map, const IColumn& nested_column, + const IColumn& right_column) { + if (check_column(right_column)) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (check_column(right_column)) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (check_column(right_column)) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (check_column(right_column)) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (check_column(right_column)) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (check_column(right_column)) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (check_column(right_column)) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (check_column(right_column)) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (right_column.is_date_type()) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (right_column.is_datetime_type()) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } else if (check_column(right_column)) { + return _execute_number(offsets, nested_null_map, + nested_column, right_column); + } + return nullptr; } Status _execute_non_nullable(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { - WhichDataType right_type(block.get_by_position(arguments[1]).type); - if ((right_type.is_string() && - _execute_string(block, arguments, result, input_rows_count)) || - _execute_number(block, arguments, result, input_rows_count)) { + // extract array offsets and nested data + auto left_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + const auto& array_column = reinterpret_cast(*left_column); + const auto& offsets = array_column.get_offsets(); + const UInt8* nested_null_map = nullptr; + ColumnPtr nested_column = nullptr; + if (array_column.get_data().is_nullable()) { + const auto& nested_null_column = + reinterpret_cast(array_column.get_data()); + nested_null_map = nested_null_column.get_null_map_column().get_data().data(); + nested_column = nested_null_column.get_nested_column_ptr(); + } else { + nested_column = array_column.get_data_ptr(); + } + + // get right column + auto right_column = + block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + + // execute + auto left_element_type = remove_nullable( + assert_cast(*block.get_by_position(arguments[0]).type) + .get_nested_type()); + auto right_type = remove_nullable(block.get_by_position(arguments[1]).type); + + ColumnPtr return_column = nullptr; + if (is_string(right_type) && is_string(left_element_type)) { + return_column = + _execute_string(offsets, nested_null_map, *nested_column, *right_column); + } else if (is_number(right_type) && is_number(left_element_type)) { + if (check_column(*nested_column)) { + return_column = _execute_number_expanded( + offsets, nested_null_map, *nested_column, *right_column); + } else if (check_column(*nested_column)) { + return_column = _execute_number_expanded(offsets, nested_null_map, + *nested_column, *right_column); + } else if (check_column(*nested_column)) { + return_column = _execute_number_expanded( + offsets, nested_null_map, *nested_column, *right_column); + } else if (check_column(*nested_column)) { + return_column = _execute_number_expanded( + offsets, nested_null_map, *nested_column, *right_column); + } else if (check_column(*nested_column)) { + return_column = _execute_number_expanded( + offsets, nested_null_map, *nested_column, *right_column); + } else if (check_column(*nested_column)) { + return_column = _execute_number_expanded( + offsets, nested_null_map, *nested_column, *right_column); + } else if (check_column(*nested_column)) { + return_column = _execute_number_expanded( + offsets, nested_null_map, *nested_column, *right_column); + } else if (check_column(*nested_column)) { + return_column = _execute_number_expanded( + offsets, nested_null_map, *nested_column, *right_column); + } else if (check_column(*nested_column)) { + return_column = _execute_number_expanded( + offsets, nested_null_map, *nested_column, *right_column); + } + } else if (is_date_or_datetime(right_type) && is_date_or_datetime(left_element_type)) { + if (nested_column->is_date_type()) { + return_column = _execute_number_expanded(offsets, nested_null_map, + *nested_column, *right_column); + } else if (nested_column->is_datetime_type()) { + return_column = _execute_number_expanded( + offsets, nested_null_map, *nested_column, *right_column); + } + } + + if (return_column) { + block.replace_by_position(result, std::move(return_column)); return Status::OK(); } return Status::RuntimeError( - fmt::format("unsupported types for function {}({}, {})", get_name(), - block.get_by_position(arguments[0]).type->get_name(), + fmt::format("execute failed or unsupported types for function {}({}, {})", + get_name(), block.get_by_position(arguments[0]).type->get_name(), block.get_by_position(arguments[1]).type->get_name())); } -#undef NUMBER_TPL_PACK }; } // namespace doris::vectorized diff --git a/be/test/vec/function/function_array_element_test.cpp b/be/test/vec/function/function_array_element_test.cpp index f36c33c38a..39d9c23cab 100644 --- a/be/test/vec/function/function_array_element_test.cpp +++ b/be/test/vec/function/function_array_element_test.cpp @@ -24,6 +24,7 @@ #include "runtime/tuple_row.h" #include "util/url_coding.h" #include "vec/core/field.h" +#include "vec/data_types/data_type_decimal.h" namespace doris::vectorized { @@ -57,6 +58,91 @@ TEST(function_array_element_test, element_at) { check_function(func_name, input_types, data_set); } + // element_at(Array, Int64) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int128, TypeIndex::Int64}; + + Array vec = {Int128(1), Int128(2), Int128(3)}; + DataSet data_set = {{{vec, Int64(0)}, Null()}, {{vec, Int64(1)}, Int128(1)}, + {{vec, Int64(4)}, Null()}, {{vec, Int64(-1)}, Int128(3)}, + {{vec, Int64(-3)}, Int128(1)}, {{vec, Int64(-4)}, Null()}, + {{Null(), Int64(1)}, Null()}, {{empty_arr, Int64(0)}, Null()}, + {{empty_arr, Int64(1)}, Null()}}; + + check_function(func_name, input_types, data_set); + } + + // element_at(Array, Int64) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Float64, TypeIndex::Int64}; + + Array vec = {double(1.11), double(2.22), double(3.33)}; + DataSet data_set = {{{vec, Int64(0)}, Null()}, {{vec, Int64(1)}, double(1.11)}, + {{vec, Int64(4)}, Null()}, {{vec, Int64(-1)}, double(3.33)}, + {{vec, Int64(-3)}, double(1.11)}, {{vec, Int64(-4)}, Null()}, + {{Null(), Int64(1)}, Null()}, {{empty_arr, Int64(0)}, Null()}, + {{empty_arr, Int64(1)}, Null()}}; + + check_function(func_name, input_types, data_set); + } + + // element_at(Array, Int64) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::Int64}; + + Array vec = {str_to_data_time("2022-01-02 01:00:00"), str_to_data_time(""), + str_to_data_time("2022-07-08 03:00:00")}; + DataSet data_set = {{{vec, Int64(0)}, Null()}, + {{vec, Int64(1)}, str_to_data_time("2022-01-02 01:00:00")}, + {{vec, Int64(4)}, Null()}, + {{vec, Int64(-1)}, str_to_data_time("2022-07-08 03:00:00")}, + {{vec, Int64(-2)}, str_to_data_time("")}, + {{vec, Int64(-4)}, Null()}, + {{Null(), Int64(1)}, Null()}, + {{empty_arr, Int64(0)}, Null()}, + {{empty_arr, Int64(1)}, Null()}}; + + check_function(func_name, input_types, data_set); + } + + // element_at(Array, Int64) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Int64}; + + Array vec = {str_to_data_time("2022-01-02"), str_to_data_time(""), + str_to_data_time("2022-07-08")}; + DataSet data_set = {{{vec, Int64(0)}, Null()}, + {{vec, Int64(1)}, str_to_data_time("2022-01-02")}, + {{vec, Int64(4)}, Null()}, + {{vec, Int64(-1)}, str_to_data_time("2022-07-08")}, + {{vec, Int64(-2)}, str_to_data_time("")}, + {{vec, Int64(-4)}, Null()}, + {{Null(), Int64(1)}, Null()}, + {{empty_arr, Int64(0)}, Null()}, + {{empty_arr, Int64(1)}, Null()}}; + + check_function(func_name, input_types, data_set); + } + + // element_at(Array, Int64) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128, TypeIndex::Int64}; + + Array vec = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67), + ut_type::DECIMALFIELD(0.0)}; + DataSet data_set = {{{vec, Int64(0)}, Null()}, + {{vec, Int64(1)}, ut_type::DECIMAL(17014116.67)}, + {{vec, Int64(4)}, Null()}, + {{vec, Int64(-1)}, ut_type::DECIMAL(0.0)}, + {{vec, Int64(-2)}, ut_type::DECIMAL(-17014116.67)}, + {{vec, Int64(-4)}, Null()}, + {{Null(), Int64(1)}, Null()}, + {{empty_arr, Int64(0)}, Null()}, + {{empty_arr, Int64(1)}, Null()}}; + + check_function, true>(func_name, input_types, data_set); + } + // element_at(Array, Int32) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::Int32}; diff --git a/be/test/vec/function/function_array_index_test.cpp b/be/test/vec/function/function_array_index_test.cpp index eb4c27f7f6..c1a0231a7d 100644 --- a/be/test/vec/function/function_array_index_test.cpp +++ b/be/test/vec/function/function_array_index_test.cpp @@ -70,6 +70,89 @@ TEST(function_array_index_test, array_contains) { check_function(func_name, input_types, data_set); } + // array_contains(Array, Int128) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int128, TypeIndex::Int128}; + + Array vec = {Int128(11111111111LL), Int128(22222LL), Int128(333LL)}; + DataSet data_set = {{{vec, Int128(11111111111LL)}, UInt8(1)}, + {{vec, Int128(4)}, UInt8(0)}, + {{Null(), Int128(1)}, Null()}, + {{empty_arr, Int128(1)}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_contains(Array, Float32) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Float32, TypeIndex::Float32}; + + Array vec = {float(1.2345), float(2.222), float(3.0)}; + DataSet data_set = {{{vec, float(2.222)}, UInt8(1)}, + {{vec, float(4)}, UInt8(0)}, + {{Null(), float(1)}, Null()}, + {{empty_arr, float(1)}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_contains(Array, Float64) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Float64, TypeIndex::Float64}; + + Array vec = {double(1.2345), double(2.222), double(3.0)}; + DataSet data_set = {{{vec, double(2.222)}, UInt8(1)}, + {{vec, double(4)}, UInt8(0)}, + {{Null(), double(1)}, Null()}, + {{empty_arr, double(1)}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_contains(Array, Date) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Date}; + + Array vec = {str_to_data_time("2022-01-02", false), str_to_data_time("", false), + str_to_data_time("2022-07-08", false)}; + DataSet data_set = {{{vec, std::string("2022-01-02")}, UInt8(1)}, + {{vec, std::string("")}, UInt8(1)}, + {{vec, std::string("2022-01-03")}, UInt8(0)}, + {{Null(), std::string("2022-01-04")}, Null()}, + {{empty_arr, std::string("2022-01-02")}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_contains(Array, DateTime) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::DateTime}; + + Array vec = {str_to_data_time("2022-01-02 00:00:00"), str_to_data_time(""), + str_to_data_time("2022-07-08 00:00:00")}; + DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")}, UInt8(1)}, + {{vec, std::string("")}, UInt8(1)}, + {{vec, std::string("2022-01-03 00:00:00")}, UInt8(0)}, + {{Null(), std::string("2022-01-04 00:00:00")}, Null()}, + {{empty_arr, std::string("2022-01-02 00:00:00")}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_contains(Array, Decimal128) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128, TypeIndex::Decimal128}; + + Array vec = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67), + ut_type::DECIMALFIELD(0.0)}; + DataSet data_set = {{{vec, ut_type::DECIMAL(-17014116.67)}, UInt8(1)}, + {{vec, ut_type::DECIMAL(0)}, UInt8(1)}, + {{Null(), ut_type::DECIMAL(0)}, Null()}, + {{empty_arr, ut_type::DECIMAL(0)}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } + // array_contains(Array, String) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::String}; @@ -128,6 +211,50 @@ TEST(function_array_index_test, array_position) { check_function(func_name, input_types, data_set); } + // array_position(Array, Date) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Date}; + + Array vec = {str_to_data_time("2022-01-02", false), str_to_data_time("", false), + str_to_data_time("2022-07-08", false)}; + DataSet data_set = {{{vec, std::string("2022-01-02")}, Int64(1)}, + {{vec, std::string("")}, Int64(2)}, + {{vec, std::string("2022-01-03")}, Int64(0)}, + {{Null(), std::string("2022-01-04")}, Null()}, + {{empty_arr, std::string("2022-01-02")}, Int64(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_position(Array, DateTime) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::DateTime}; + + Array vec = {str_to_data_time("2022-01-02 00:00:00"), str_to_data_time(""), + str_to_data_time("2022-07-08 00:00:00")}; + DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")}, Int64(1)}, + {{vec, std::string("")}, Int64(2)}, + {{vec, std::string("2022-01-03 00:00:00")}, Int64(0)}, + {{Null(), std::string("2022-01-04 00:00:00")}, Null()}, + {{empty_arr, std::string("2022-01-02 00:00:00")}, Int64(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_position(Array, Decimal128) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128, TypeIndex::Decimal128}; + + Array vec = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67), + ut_type::DECIMALFIELD(0)}; + DataSet data_set = {{{vec, ut_type::DECIMAL(-17014116.67)}, Int64(2)}, + {{vec, ut_type::DECIMAL(0)}, Int64(3)}, + {{Null(), ut_type::DECIMAL(0)}, Null()}, + {{empty_arr, ut_type::DECIMAL(0)}, Int64(0)}}; + + check_function(func_name, input_types, data_set); + } + // array_position(Array, String) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::String}; diff --git a/be/test/vec/function/function_test_util.cpp b/be/test/vec/function/function_test_util.cpp index 91fa39abed..2d8ef33fee 100644 --- a/be/test/vec/function/function_test_util.cpp +++ b/be/test/vec/function/function_test_util.cpp @@ -70,6 +70,10 @@ size_t type_index_to_data_type(const std::vector& input_types, size_t desc.type = doris_udf::FunctionContext::TYPE_LARGEINT; type = std::make_shared(); return 1; + case TypeIndex::Float32: + desc.type = doris_udf::FunctionContext::TYPE_FLOAT; + type = std::make_shared(); + return 1; case TypeIndex::Float64: desc.type = doris_udf::FunctionContext::TYPE_DOUBLE; type = std::make_shared(); @@ -84,7 +88,7 @@ size_t type_index_to_data_type(const std::vector& input_types, size_t return 1; case TypeIndex::Date: desc.type = doris_udf::FunctionContext::TYPE_DATE; - type = std::make_shared(); + type = std::make_shared(); return 1; case TypeIndex::Array: { desc.type = doris_udf::FunctionContext::TYPE_ARRAY; @@ -152,6 +156,9 @@ bool insert_cell(MutableColumnPtr& column, DataTypePtr type_ptr, const std::any& } else if (type.is_int128()) { auto value = std::any_cast(cell); column->insert_data(reinterpret_cast(&value), 0); + } else if (type.is_float32()) { + auto value = std::any_cast(cell); + column->insert_data(reinterpret_cast(&value), 0); } else if (type.is_float64()) { auto value = std::any_cast(cell); column->insert_data(reinterpret_cast(&value), 0); diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 76457131e5..8765ec83fb 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -58,7 +58,10 @@ using STRING = std::string; using DOUBLE = double; using FLOAT = float; -inline auto DECIMAL = Decimal::double_to_decimal; +inline auto DECIMAL = Decimal128::double_to_decimal; +inline auto DECIMALFIELD = [](double v) { + return DecimalField(Decimal128::double_to_decimal(v), 9); +}; using DATETIME = std::string; @@ -179,11 +182,16 @@ void check_function(const std::string& func_name, const InputTypeSet& input_type Field field; column->get(i, field); - const auto& column_data = field.get(); const auto& expect_data = std::any_cast(data_set[i].second); - EXPECT_EQ(column_data, expect_data); + if constexpr (std::is_same_v>) { + const auto& column_data = field.get>().get_value(); + EXPECT_EQ(column_data.value, expect_data.value); + } else { + const auto& column_data = field.get(); + EXPECT_EQ(column_data, expect_data); + } }; if constexpr (nullable) { diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index d655de36fc..2d2d7d1bbf 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -117,67 +117,47 @@ visible_functions = [ [['array'], 'ARRAY', ['ARRAY', '...'], '', '', '', '', ''], [['array'], 'ARRAY', ['MAP', '...'], '', '', '', '', ''], [['array'], 'ARRAY', ['STRUCT', '...'], '', '', '', '', ''], - [['%element_extract%'], 'VARCHAR', ['MAP', 'VARCHAR'], '', '', '', '', ''], - [['%element_extract%'], 'VARCHAR', ['MAP', 'INT'], '', '', '', '', ''], - [['%element_extract%'], 'VARCHAR', ['STRUCT', 'INT'], '', '', '', '', ''], - [['%element_extract%'], 'VARCHAR', ['STRUCT', 'VARCHAR'], '', '', '', '', ''], - [['element_at', '%element_extract%'], 'TINYINT', ['ARRAY_TINYINT', 'INT'], - '_ZN5doris10vectorized20FunctionArrayElement12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', 'ALWAYS_NULLABLE'], - [['element_at', '%element_extract%'], 'SMALLINT', ['ARRAY_SMALLINT', 'INT'], - '_ZN5doris10vectorized20FunctionArrayElement12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', 'ALWAYS_NULLABLE'], - [['element_at', '%element_extract%'], 'INT', ['ARRAY_INT', 'INT'], - '_ZN5doris10vectorized20FunctionArrayElement12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', 'ALWAYS_NULLABLE'], - [['element_at', '%element_extract%'], 'BIGINT', ['ARRAY_BIGINT', 'INT'], - '_ZN5doris10vectorized20FunctionArrayElement12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', 'ALWAYS_NULLABLE'], - [['element_at', '%element_extract%'], 'VARCHAR', ['ARRAY_VARCHAR', 'INT'], - '_ZN5doris10vectorized20FunctionArrayElement12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', 'ALWAYS_NULLABLE'], - [['element_at', '%element_extract%'], 'STRING', ['ARRAY_STRING', 'INT'], - '_ZN5doris10vectorized20FunctionArrayElement12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'BOOLEAN', ['ARRAY_BOOLEAN', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'TINYINT', ['ARRAY_TINYINT', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'SMALLINT', ['ARRAY_SMALLINT', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'INT', ['ARRAY_INT', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'BIGINT', ['ARRAY_BIGINT', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'LARGEINT', ['ARRAY_LARGEINT', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'DATETIME', ['ARRAY_DATETIME', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'DATE', ['ARRAY_DATE', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'FLOAT', ['ARRAY_FLOAT', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'DOUBLE', ['ARRAY_DOUBLE', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'DECIMALV2', ['ARRAY_DECIMALV2', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'VARCHAR', ['ARRAY_VARCHAR', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['element_at', '%element_extract%'], 'STRING', ['ARRAY_STRING', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], - [['array_contains'], 'BOOLEAN', ['ARRAY', 'TINYINT'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_contains'], 'BOOLEAN', ['ARRAY', 'SMALLINT'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_contains'], 'BOOLEAN', ['ARRAY', 'INT'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_contains'], 'BOOLEAN', ['ARRAY', 'BIGINT'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_contains'], 'BOOLEAN', ['ARRAY', 'VARCHAR'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_contains'], 'BOOLEAN', ['ARRAY', 'STRING'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_TINYINT', 'TINYINT'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_SMALLINT', 'SMALLINT'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_INT', 'INT'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_BIGINT', 'BIGINT'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_LARGEINT', 'LARGEINT'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_DATETIME', 'DATETIME'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_DATE', 'DATE'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_FLOAT', 'FLOAT'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_DOUBLE', 'DOUBLE'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_DECIMALV2', 'DECIMALV2'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_VARCHAR', 'VARCHAR'], '', '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY_STRING', 'STRING'], '', '', '', 'vec', ''], - [['array_position'], 'BIGINT', ['ARRAY', 'TINYINT'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_position'], 'BIGINT', ['ARRAY', 'SMALLINT'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_position'], 'BIGINT', ['ARRAY', 'INT'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_position'], 'BIGINT', ['ARRAY', 'BIGINT'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_position'], 'BIGINT', ['ARRAY', 'VARCHAR'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], - [['array_position'], 'BIGINT', ['ARRAY', 'STRING'], - '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', - '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_BOOLEAN', 'BOOLEAN'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_TINYINT', 'TINYINT'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_SMALLINT', 'SMALLINT'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_INT', 'INT'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_BIGINT', 'BIGINT'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_LARGEINT', 'LARGEINT'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_DATETIME', 'DATETIME'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_DATE', 'DATE'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_FLOAT', 'FLOAT'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_DOUBLE', 'DOUBLE'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_DECIMALV2', 'DECIMALV2'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_VARCHAR', 'VARCHAR'], '', '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY_STRING', 'STRING'], '', '', '', 'vec', ''], # Timestamp functions [['unix_timestamp'], 'INT', [],