diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index f863bea801..a4598e5c15 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -110,6 +110,8 @@ set(VEC_FILES exprs/vinfo_func.cpp exprs/table_function/vexplode_split.cpp exprs/table_function/vexplode_numbers.cpp + functions/array/function_array_index.cpp + functions/array/function_array_register.cpp functions/math.cpp functions/function_bitmap.cpp functions/function_bitmap_variadic.cpp diff --git a/be/src/vec/functions/array/function_array_index.cpp b/be/src/vec/functions/array/function_array_index.cpp new file mode 100644 index 0000000000..474500ed89 --- /dev/null +++ b/be/src/vec/functions/array/function_array_index.cpp @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/functions/array/function_array_index.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +struct NameArrayContains { static constexpr auto name = "array_contains"; }; +struct NameArrayPosition { static constexpr auto name = "array_position"; }; + +void register_function_array_index(SimpleFunctionFactory& factory) { + factory.register_function>(); + factory.register_function>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h new file mode 100644 index 0000000000..f0948112cd --- /dev/null +++ b/be/src/vec/functions/array/function_array_index.h @@ -0,0 +1,196 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/arrayIndex.h +// and modified by Doris +#pragma once + +#include + +#include "vec/columns/column_array.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_string.h" +#include "vec/data_types/data_type_number.h" +#include "vec/functions/function.h" + +namespace doris::vectorized { + +struct ArrayContainsAction +{ + using ResultType = UInt8; + static constexpr const bool resume_execution = false; + static constexpr void apply(ResultType& current, size_t) noexcept { current = 1; } +}; + +struct ArrayPositionAction +{ + using ResultType = Int64; + static constexpr const bool resume_execution = false; + static constexpr void apply(ResultType& current, size_t j) noexcept { current = j + 1; } +}; + +template +class FunctionArrayIndex : public IFunction +{ +public: + using ResultType = typename ConcreteAction::ResultType; + + static constexpr auto name = Name::name; + static FunctionPtr create() { return std::make_shared(); } + + /// Get function name. + String get_name() const override { return name; } + + bool is_variadic() const override { return false; } + + size_t get_number_of_arguments() const override { return 2; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + DCHECK(WhichDataType(arguments[0]).is_array()); + return std::make_shared>(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + return execute_non_nullable(block, arguments, result, input_rows_count); + } + +private: + static bool execute_string(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { + // check array nested column type and get data + auto array_column = check_and_get_column(*block.get_by_position(arguments[0]).column); + DCHECK(array_column != nullptr); + auto nested_column = check_and_get_column(array_column->get_data()); + if (!nested_column) { + return false; + } + const auto& arr_offs = array_column->get_offsets(); + const auto& str_offs = nested_column->get_offsets(); + const auto& str_chars = nested_column->get_chars(); + + // check right column type + auto ptr = block.get_by_position(arguments[1]).column; + if (is_column_const(*ptr)) { + ptr = check_and_get_column(ptr)->get_data_column_ptr(); + } + if (!check_and_get_column(*ptr)) { + return false; + } + + // expand const column and get data + auto right_column = check_and_get_column(*block.get_by_position(arguments[1]).column->convert_to_full_column_if_const()); + const auto& right_offs = right_column->get_offsets(); + const auto& right_chars = right_column->get_chars(); + + // prepare return data + auto dst = ColumnVector::create(); + auto& dst_data = dst->get_data(); + dst_data.resize(input_rows_count); + + // process + for (size_t row = 0; row < input_rows_count; ++row) { + ResultType res = 0; + size_t off = arr_offs[row - 1]; + size_t len = arr_offs[row] - off; + + size_t right_off = right_offs[row - 1]; + size_t right_len = right_offs[row] - right_off; + for (size_t pos = 0; pos < len; ++pos) { + size_t str_pos = str_offs[pos + off - 1]; + size_t str_len = str_offs[pos + off] - str_pos; + + const char* left_raw_v = reinterpret_cast(&str_chars[str_pos]); + const char* right_raw_v = reinterpret_cast(&right_chars[right_off]); + if (std::string_view(left_raw_v, str_len) == std::string_view(right_raw_v, right_len)) { + ConcreteAction::apply(res, pos); + break; + } + } + dst_data[row] = res; + } + block.replace_by_position(result, std::move(dst)); + return true; + } + +#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 + template + static bool execute_integral(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { + return (execute_integral_expanded(block, arguments, result, input_rows_count) || ...); + } + template + static bool execute_integral_expanded(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { + return (execute_integral_impl(block, arguments, result, input_rows_count) || ...); + } + template + static bool execute_integral_impl(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { + // check array nested column type and get data + auto array_column = check_and_get_column(*block.get_by_position(arguments[0]).column); + DCHECK(array_column != nullptr); + auto nested_column = check_and_get_column>(array_column->get_data()); + if (!nested_column) { + return false; + } + const auto& offsets = array_column->get_offsets(); + const auto& nested_data = nested_column->get_data(); + + // check right column type + auto ptr = block.get_by_position(arguments[1]).column; + if (is_column_const(*ptr)) { + ptr = check_and_get_column(ptr)->get_data_column_ptr(); + } + if (!check_and_get_column>(*ptr)) { + return false; + } + + // expand const column and get data + auto right_column = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + const auto& right_data = check_and_get_column>(*right_column)->get_data(); + + // prepare return data + auto dst = ColumnVector::create(); + auto& dst_data = dst->get_data(); + dst_data.resize(input_rows_count); + + // process + for (size_t row = 0; row < input_rows_count; ++row) { + ResultType res = 0; + size_t off = offsets[row - 1]; + size_t len = offsets[row] - off; + for (size_t pos = 0; pos < len; ++pos) { + if (nested_data[pos + off] == right_data[row]) { + ConcreteAction::apply(res, pos); + break; + } + } + dst_data[row] = res; + } + block.replace_by_position(result, std::move(dst)); + return true; + } + + Status execute_non_nullable(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { + WhichDataType right_type(block.get_by_position(arguments[1]).type); + if ((right_type.is_string() && execute_string(block, arguments, result, input_rows_count)) || + execute_integral(block, arguments, result, input_rows_count)) { + return Status::OK(); + } + return Status::OK(); + } +#undef INTEGRAL_TPL_PACK +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp new file mode 100644 index 0000000000..e9ab7630fe --- /dev/null +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/registerFunctionsArray.cpp +// and modified by Doris + +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +void register_function_array_index(SimpleFunctionFactory&); + +void register_function_array(SimpleFunctionFactory& factory) { + register_function_array_index(factory); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index f6fdb6c52f..46124b6a27 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -73,6 +73,7 @@ void register_function_datetime_floor_ceil(SimpleFunctionFactory& factory); void register_function_convert_tz(SimpleFunctionFactory& factory); void register_function_least_greast(SimpleFunctionFactory& factory); void register_function_fake(SimpleFunctionFactory& factory); +void register_function_array(SimpleFunctionFactory& factory); void register_function_encryption(SimpleFunctionFactory& factory); void register_function_regexp_extract(SimpleFunctionFactory& factory); @@ -200,6 +201,7 @@ public: register_function_encryption(instance); register_function_regexp_extract(instance); register_function_hex_variadic(instance); + register_function_array(instance); }); return instance; } diff --git a/be/src/vec/olap/vgeneric_iterators.cpp b/be/src/vec/olap/vgeneric_iterators.cpp index e99d0f5928..4b2607347e 100644 --- a/be/src/vec/olap/vgeneric_iterators.cpp +++ b/be/src/vec/olap/vgeneric_iterators.cpp @@ -143,9 +143,6 @@ public: if (data_type == nullptr) { return Status::RuntimeError("invalid data type"); } - if (column_desc->is_nullable()) { - data_type = std::make_shared(std::move(data_type)); - } auto column = data_type->create_column(); column->reserve(_block_row_max); _block.insert(ColumnWithTypeAndName(std::move(column), data_type, column_desc->name())); diff --git a/be/test/vec/exec/vgeneric_iterators_test.cpp b/be/test/vec/exec/vgeneric_iterators_test.cpp index a257ff73da..d38f657e08 100644 --- a/be/test/vec/exec/vgeneric_iterators_test.cpp +++ b/be/test/vec/exec/vgeneric_iterators_test.cpp @@ -55,9 +55,6 @@ static void create_block(Schema& schema, vectorized::Block& block) ASSERT_TRUE(column_desc); auto data_type = Schema::get_data_type_ptr(*column_desc); ASSERT_NE(data_type, nullptr); - if (column_desc->is_nullable()) { - data_type = std::make_shared(std::move(data_type)); - } auto column = data_type->create_column(); vectorized::ColumnWithTypeAndName ctn(std::move(column), data_type, column_desc->name()); block.insert(ctn); diff --git a/be/test/vec/function/CMakeLists.txt b/be/test/vec/function/CMakeLists.txt index c4101f551d..827bfb889e 100644 --- a/be/test/vec/function/CMakeLists.txt +++ b/be/test/vec/function/CMakeLists.txt @@ -18,6 +18,7 @@ # where to put generated libraries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/vec/function") +ADD_BE_TEST(function_array_index_test) ADD_BE_TEST(function_bitmap_test) ADD_BE_TEST(function_comparison_test) ADD_BE_TEST(function_hash_test) diff --git a/be/test/vec/function/function_array_index_test.cpp b/be/test/vec/function/function_array_index_test.cpp new file mode 100644 index 0000000000..7c34c3850c --- /dev/null +++ b/be/test/vec/function/function_array_index_test.cpp @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include + +#include "function_test_util.h" +#include "runtime/tuple_row.h" +#include "util/url_coding.h" +#include "vec/core/field.h" + +namespace doris::vectorized { + +TEST(function_array_index_test, array_contains) { + std::string func_name = "array_contains"; + Array empty_arr; + + // array_contains(Array, Int32) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32, TypeIndex::Int32}; + + Array vec = {Int32(1), Int32(2), Int32(3)}; + DataSet data_set = {{{vec, 2}, UInt8(1)}, {{vec, 4}, UInt8(0)}, {{Null(), 1}, Null()}, {{empty_arr, 1}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_contains(Array, Int8) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32, TypeIndex::Int8}; + + Array vec = {Int32(1), Int32(2), Int32(3)}; + DataSet data_set = {{{vec, Int8(2)}, UInt8(1)}, {{vec, Int8(4)}, UInt8(0)}, {{Null(), Int8(1)}, Null()}, {{empty_arr, Int8(1)}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_contains(Array, Int64) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int8, TypeIndex::Int64}; + + Array vec = {Int8(1), Int8(2), Int8(3)}; + DataSet data_set = {{{vec, Int64(2)}, UInt8(1)}, {{vec, Int64(4)}, UInt8(0)}, {{Null(), Int64(1)}, Null()}, {{empty_arr, Int64(1)}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_contains(Array, String) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::String}; + + Array vec = {Field("abc", 3), Field("", 0), Field("def",3)}; + DataSet data_set = {{{vec, std::string("abc")}, UInt8(1)}, {{vec, std::string("aaa")}, UInt8(0)}, + {{vec, std::string("")}, UInt8(1)}, {{Null(), std::string("abc")}, Null()}, {{empty_arr, std::string("")}, UInt8(0)}}; + + check_function(func_name, input_types, data_set); + } +} + +TEST(function_array_index_test, array_position) { + std::string func_name = "array_position"; + Array empty_arr; + + // array_position(Array, Int32) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32, TypeIndex::Int32}; + + Array vec = {Int32(1), Int32(2), Int32(3)}; + DataSet data_set = {{{vec, 2}, Int64(2)}, {{vec, 4}, Int64(0)}, {{Null(), 1}, Null()}, {{empty_arr, 1}, Int64(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_position(Array, Int8) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32, TypeIndex::Int8}; + + Array vec = {Int32(1), Int32(2), Int32(3)}; + DataSet data_set = {{{vec, Int8(2)}, Int64(2)}, {{vec, Int8(4)}, Int64(0)}, {{Null(), Int8(1)}, Null()}, {{empty_arr, Int8(1)}, Int64(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_position(Array, Int64) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int8, TypeIndex::Int64}; + + Array vec = {Int8(1), Int8(2), Int8(3)}; + DataSet data_set = {{{vec, Int64(2)}, Int64(2)}, {{vec, Int64(4)}, Int64(0)}, {{Null(), Int64(1)}, Null()}, {{empty_arr, Int64(1)}, Int64(0)}}; + + check_function(func_name, input_types, data_set); + } + + // array_position(Array, String) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::String}; + + Array vec = {Field("abc", 3), Field("", 0), Field("def",3)}; + DataSet data_set = {{{vec, std::string("abc")}, Int64(1)}, {{vec, std::string("aaa")}, Int64(0)}, + {{vec, std::string("")}, Int64(2)}, {{Null(), std::string("abc")}, Null()}, {{empty_arr, std::string("")}, Int64(0)}}; + + check_function(func_name, input_types, data_set); + } +} + +} // namespace doris::vectorized + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 247c309965..c345354fbf 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -68,19 +68,114 @@ using FLOAT = float; inline auto DECIMAL = Decimal::double_to_decimal; using DATETIME = std::string; + +struct UTDataTypeDesc { + DataTypePtr data_type; + doris_udf::FunctionContext::TypeDesc type_desc; + std::string col_name; + bool is_const = false; + bool is_nullable = true; +}; +using UTDataTypeDescs = std::vector; + } // namespace ut_type -template -void insert_column_to_block(std::list& columns, ColumnsWithTypeAndName& ctn, - Column&& col, NullColumn&& null_map, Block& block, - const std::string& col_name, int i, bool is_const, int row_size) { - columns.emplace_back(ColumnNullable::create(std::move(col), std::move(null_map))); - ColumnWithTypeAndName type_and_name( - is_const ? ColumnConst::create(columns.back()->get_ptr(), row_size) - : columns.back()->get_ptr(), - make_nullable(std::make_shared()), col_name); - block.insert(i, type_and_name); - ctn.emplace_back(type_and_name); +size_t type_index_to_data_type(const std::vector& input_types, size_t index, + doris_udf::FunctionContext::TypeDesc& desc, + DataTypePtr& type) { + if(index < 0 || index >= input_types.size()) { + return -1; + } + + TypeIndex tp; + if (input_types[index].type() == typeid(Consted)) { + tp = std::any_cast(input_types[index]).tp; + } else { + tp = std::any_cast(input_types[index]); + } + + switch (tp) { + case TypeIndex::String: + desc.type = doris_udf::FunctionContext::TYPE_STRING; + type = std::make_shared(); + return 1; + case TypeIndex::BitMap: + desc.type = doris_udf::FunctionContext::TYPE_OBJECT; + type = std::make_shared(); + return 1; + case TypeIndex::Int8: + desc.type = doris_udf::FunctionContext::TYPE_TINYINT; + type = std::make_shared(); + return 1; + case TypeIndex::Int16: + desc.type = doris_udf::FunctionContext::TYPE_SMALLINT; + type = std::make_shared(); + return 1; + case TypeIndex::Int32: + desc.type = doris_udf::FunctionContext::TYPE_INT; + type = std::make_shared(); + return 1; + case TypeIndex::Int64: + desc.type = doris_udf::FunctionContext::TYPE_BIGINT; + type = std::make_shared(); + return 1; + case TypeIndex::Int128: + desc.type = doris_udf::FunctionContext::TYPE_LARGEINT; + type = std::make_shared(); + return 1; + case TypeIndex::Float64: + desc.type = doris_udf::FunctionContext::TYPE_DOUBLE; + type = std::make_shared(); + return 1; + case TypeIndex::Decimal128: + desc.type = doris_udf::FunctionContext::TYPE_DECIMALV2; + type = std::make_shared>(); + return 1; + case TypeIndex::DateTime: + desc.type = doris_udf::FunctionContext::TYPE_DATETIME; + type = std::make_shared(); + return 1; + case TypeIndex::Date: + desc.type = doris_udf::FunctionContext::TYPE_DATE; + type = std::make_shared(); + return 1; + case TypeIndex::Array: { + desc.type = doris_udf::FunctionContext::TYPE_ARRAY; + doris_udf::FunctionContext::TypeDesc sub_desc; + DataTypePtr sub_type = nullptr; + size_t ret = type_index_to_data_type(input_types, index + 1, sub_desc, sub_type); + if (ret <= 0) { + return ret; + } + desc.children.push_back(doris_udf::FunctionContext::TypeDesc()); + type = std::make_shared(std::move(sub_type)); + return ret + 1; + } + default: + LOG(WARNING) << "not supported TypeIndex:" << (int)tp; + return 0; + } +} +bool parse_ut_data_type(const std::vector& input_types, ut_type::UTDataTypeDescs& descs) { + descs.clear(); + descs.reserve(input_types.size()); + for (size_t i = 0; i < input_types.size(); ) { + ut_type::UTDataTypeDesc desc; + if (input_types[i].type() == typeid(Consted)) { + desc.is_const = true; + } + size_t res = type_index_to_data_type(input_types, i, desc.type_desc, desc.data_type); + if (res <= 0) { + return false; + } + if (desc.is_nullable) { + desc.data_type = make_nullable(std::move(desc.data_type)); + } + desc.col_name = "k" + std::to_string(i); + descs.emplace_back(desc); + i += res; + } + return true; } // Null values are represented by Null() @@ -89,226 +184,101 @@ void insert_column_to_block(std::list& columns, ColumnsWithTypeAndNam template void check_function(const std::string& func_name, const std::vector& input_types, const DataSet& data_set) { - size_t row_size = data_set.size(); - size_t column_size = input_types.size(); + // 1.0 create data type + ut_type::UTDataTypeDescs descs; + ASSERT_TRUE(parse_ut_data_type(input_types, descs)); - std::list columns; + // 1.1 insert data and create block + auto row_size = data_set.size(); Block block; - ColumnNumbers arguments; - ColumnsWithTypeAndName ctn; - std::vector> constant_col_ptrs; - std::vector constant_cols; - std::vector arg_types; - doris_udf::FunctionContext::TypeDesc arg_type; - // 1. build block and column type and names - for (int i = 0; i < column_size; i++) { - TypeIndex tp; - bool is_const; - if (input_types[i].type() == typeid(Consted)) { - tp = std::any_cast(input_types[i]).tp; - is_const = true; - } else { - tp = std::any_cast(input_types[i]); - is_const = false; - } + for (size_t i = 0; i < descs.size(); ++i) { + auto& desc = descs[i]; + auto column = desc.data_type->create_column(); + column->reserve(row_size); - std::string col_name = "k" + std::to_string(i); + auto type_ptr = desc.data_type->is_nullable() ? + ((DataTypeNullable*)(desc.data_type.get()))->get_nested_type() : desc.data_type; + WhichDataType type(type_ptr); - auto null_map = ColumnUInt8::create(row_size, false); - auto& null_map_data = null_map->get_data(); + for (int j = 0; j < row_size; j++) { + if (data_set[j].first[i].type() == typeid(Null)) { + column->insert_data(nullptr, 0); + continue; + } - if (tp == TypeIndex::String) { - auto col = ColumnString::create(); - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + if (type.is_string()) { auto str = std::any_cast(data_set[j].first[i]); - col->insert_data(str.c_str(), str.size()); - } - insert_column_to_block(columns, ctn, std::move(col), - std::move(null_map), block, col_name, i, - is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_STRING; - } else if (tp == TypeIndex::BitMap) { - auto col = ColumnBitmap::create(); - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data(str.c_str(), str.size()); + } else if (type.idx == TypeIndex::BitMap) { BitmapValue* bitmap = std::any_cast(data_set[j].first[i]); - col->insert_value(*bitmap); - } - insert_column_to_block(columns, ctn, std::move(col), - std::move(null_map), block, col_name, i, - is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_OBJECT; - } else if (tp == TypeIndex::Int8) { - auto col = ColumnInt8::create(); - - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data((char*)bitmap, sizeof(BitmapValue)); + } else if (type.is_int8()) { auto value = std::any_cast(data_set[j].first[i]); - col->insert_data(reinterpret_cast(&value), 0); - } - insert_column_to_block(columns, ctn, std::move(col), std::move(null_map), - block, col_name, i, is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_TINYINT; - } else if (tp == TypeIndex::Int16) { - auto col = ColumnInt16::create(); - - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data(reinterpret_cast(&value), 0); + } else if (type.is_int16()) { auto value = std::any_cast(data_set[j].first[i]); - col->insert_data(reinterpret_cast(&value), 0); - } - insert_column_to_block(columns, ctn, std::move(col), std::move(null_map), - block, col_name, i, is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_SMALLINT; - } else if (tp == TypeIndex::Int32) { - auto col = ColumnInt32::create(); - - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data(reinterpret_cast(&value), 0); + } else if (type.is_int32()) { auto value = std::any_cast(data_set[j].first[i]); - col->insert_data(reinterpret_cast(&value), 0); - } - insert_column_to_block(columns, ctn, std::move(col), std::move(null_map), - block, col_name, i, is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_INT; - } else if (tp == TypeIndex::Int64) { - auto col = ColumnInt64::create(); - - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data(reinterpret_cast(&value), 0); + } else if (type.is_int64()) { auto value = std::any_cast(data_set[j].first[i]); - col->insert_data(reinterpret_cast(&value), 0); - } - insert_column_to_block(columns, ctn, std::move(col), std::move(null_map), - block, col_name, i, is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_BIGINT; - } else if (tp == TypeIndex::Int128) { - auto col = ColumnInt128::create(); - - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data(reinterpret_cast(&value), 0); + } else if (type.is_int128()) { auto value = std::any_cast(data_set[j].first[i]); - col->insert_data(reinterpret_cast(&value), 0); - } - insert_column_to_block(columns, ctn, std::move(col), - std::move(null_map), block, col_name, i, - is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_LARGEINT; - } else if (tp == TypeIndex::Float64) { - auto col = ColumnFloat64::create(); - - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data(reinterpret_cast(&value), 0); + } else if (type.is_float64()) { auto value = std::any_cast(data_set[j].first[i]); - col->insert_data(reinterpret_cast(&value), 0); - } - insert_column_to_block(columns, ctn, std::move(col), - std::move(null_map), block, col_name, i, - is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_DOUBLE; - } else if (tp == TypeIndex::Decimal128) { - auto col = ColumnDecimal::create(0, 9); - - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data(reinterpret_cast(&value), 0); + } else if (type.is_float64()) { + auto value = std::any_cast(data_set[j].first[i]); + column->insert_data(reinterpret_cast(&value), 0); + } else if (type.is_decimal128()) { auto value = std::any_cast>(data_set[j].first[i]); - col->insert_data(reinterpret_cast(&value), 0); - } - insert_column_to_block>(columns, ctn, std::move(col), - std::move(null_map), block, - col_name, i, is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_DECIMALV2; - } else if (tp == TypeIndex::DateTime) { - static std::string date_time_format("%Y-%m-%d %H:%i:%s"); - auto col = ColumnInt64::create(); - - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data(reinterpret_cast(&value), 0); + } else if (type.is_date_time()) { + static std::string date_time_format("%Y-%m-%d %H:%i:%s"); auto datetime_str = std::any_cast(data_set[j].first[i]); VecDateTimeValue v; v.from_date_format_str(date_time_format.c_str(), date_time_format.size(), datetime_str.c_str(), datetime_str.size()); v.to_datetime(); - col->insert_data(reinterpret_cast(&v), 0); - } - insert_column_to_block(columns, ctn, std::move(col), - std::move(null_map), block, col_name, i, - is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_DATETIME; - } else if (tp == TypeIndex::Date) { - static std::string date_time_format("%Y-%m-%d"); - auto col = ColumnInt64::create(); - - for (int j = 0; j < row_size; j++) { - if (data_set[j].first[i].type() == typeid(Null)) { - null_map_data[j] = true; - col->insert_default(); - continue; - } + column->insert_data(reinterpret_cast(&v), 0); + } else if (type.is_date()) { + static std::string date_time_format("%Y-%m-%d"); auto datetime_str = std::any_cast(data_set[j].first[i]); VecDateTimeValue v; v.from_date_format_str(date_time_format.c_str(), date_time_format.size(), datetime_str.c_str(), datetime_str.size()); v.cast_to_date(); - col->insert_data(reinterpret_cast(&v), 0); + column->insert_data(reinterpret_cast(&v), 0); + } else if (type.is_array()) { + auto v = std::any_cast(data_set[j].first[i]); + column->insert(v); + } else { + LOG(WARNING) << "dataset not supported for TypeIndex:" << (int)type.idx; + ASSERT_TRUE(false); } - insert_column_to_block(columns, ctn, std::move(col), - std::move(null_map), block, col_name, i, - is_const, row_size); - arg_type.type = doris_udf::FunctionContext::TYPE_DATE; - } else { - ASSERT_TRUE(false); - arg_type.type = doris_udf::FunctionContext::INVALID_TYPE; } + + if (desc.is_const) { + column = ColumnConst::create(std::move(column), row_size); + } + block.insert({std::move(column), desc.data_type, desc.col_name}); + } + + // 1.2 parepare args for function call + ColumnNumbers arguments; + std::vector arg_types; + std::vector> constant_col_ptrs; + std::vector constant_cols; + for (size_t i = 0; i < descs.size(); ++i) { + auto& desc = descs[i]; arguments.push_back(i); - arg_types.push_back(arg_type); - if (is_const) { - const auto& column = block.get_by_position(i).column; - std::shared_ptr constant_col = - std::make_shared(column); - constant_col_ptrs.push_back(constant_col); - constant_cols.push_back(constant_col.get()); + arg_types.push_back(desc.type_desc); + if (desc.is_const) { + constant_col_ptrs.push_back(std::make_shared(block.get_by_position(i).column)); + constant_cols.push_back(constant_col_ptrs.back().get()); } else { constant_cols.push_back(nullptr); } @@ -317,7 +287,7 @@ void check_function(const std::string& func_name, const std::vector& i // 2. execute function auto return_type = nullable ? make_nullable(std::make_shared()) : std::make_shared(); - auto func = SimpleFunctionFactory::instance().get_function(func_name, ctn, return_type); + auto func = SimpleFunctionFactory::instance().get_function(func_name, block.get_columns_with_type_and_name(), return_type); ASSERT_TRUE(func != nullptr); doris_udf::FunctionContext::TypeDesc fn_ctx_return; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java index 172bb9f3c5..f3adcc67ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java @@ -66,9 +66,13 @@ public class ArrayType extends Type { return false; } + // Array(Null) is a virtual Array type, can match any Array(...) type if (itemType.isNull()) { return true; } + if (((ArrayType) t).getItemType().isNull()) { + return true; + } return itemType.matchesType(((ArrayType) t).itemType); } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 3a5e8d1598..a79a51350f 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -120,6 +120,43 @@ visible_functions = [ [['%element_extract%'], 'VARCHAR', ['MAP', 'INT'], '', '', '', '', ''], [['%element_extract%'], 'VARCHAR', ['STRUCT', 'INT'], '', '', '', '', ''], [['%element_extract%'], 'VARCHAR', ['STRUCT', 'VARCHAR'], '', '', '', '', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY', 'TINYINT'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY', 'SMALLINT'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY', 'INT'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY', 'BIGINT'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY', 'VARCHAR'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_contains'], 'BOOLEAN', ['ARRAY', 'STRING'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayContainsActionENS0_17NameArrayContainsEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + + [['array_position'], 'BIGINT', ['ARRAY', 'TINYINT'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY', 'SMALLINT'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY', 'INT'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY', 'BIGINT'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY', 'VARCHAR'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], + [['array_position'], 'BIGINT', ['ARRAY', 'STRING'], + '_ZN5doris10vectorized18FunctionArrayIndexINS0_19ArrayPositionActionENS0_17NameArrayPositionEE12execute_implEPN9doris_udf15FunctionContextERNS0_5BlockERKSt6vectorImSaImEEmm', + '', '', 'vec', ''], # Timestamp functions [['unix_timestamp'], 'INT', [],