diff --git a/be/src/vec/functions/array/function_arrays_overlap.h b/be/src/vec/functions/array/function_arrays_overlap.h index a4152f1503..d641b78e65 100644 --- a/be/src/vec/functions/array/function_arrays_overlap.h +++ b/be/src/vec/functions/array/function_arrays_overlap.h @@ -63,15 +63,30 @@ struct OverlapSetImpl { using ElementNativeType = typename NativeType::Type; using Set = HashSetWithStackMemory, 4>; Set set; - void insert_array(const IColumn* column, size_t start, size_t size) { + + template + void insert_array(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { const auto& vec = assert_cast(*column).get_data(); for (size_t i = start; i < start + size; ++i) { + if constexpr (nullable) { + if (nullmap[i]) { + continue; + } + } set.insert(vec[i]); } } - bool find_any(const IColumn* column, size_t start, size_t size) { + + template + bool find_any(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { const auto& vec = assert_cast(*column).get_data(); for (size_t i = start; i < start + size; ++i) { + if constexpr (nullable) { + if (nullmap[i]) { + continue; + } + } + if (set.find(vec[i])) { return true; } @@ -84,13 +99,28 @@ template <> struct OverlapSetImpl { using Set = HashSetWithStackMemory, 4>; Set set; - void insert_array(const IColumn* column, size_t start, size_t size) { + + template + void insert_array(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { for (size_t i = start; i < start + size; ++i) { + if constexpr (nullable) { + if (nullmap[i]) { + continue; + } + } set.insert(column->get_data_at(i)); } } - bool find_any(const IColumn* column, size_t start, size_t size) { + + template + bool find_any(const IColumn* column, const UInt8* nullmap, size_t start, size_t size) { for (size_t i = start; i < start + size; ++i) { + if constexpr (nullable) { + if (nullmap[i]) { + continue; + } + } + if (set.find(column->get_data_at(i))) { return true; } @@ -237,7 +267,6 @@ public: auto dst_null_map = ColumnVector::create(input_rows_count, 0); UInt8* dst_null_map_data = dst_null_map->get_data().data(); - // any array is null or any elements in array is null, return null RETURN_IF_ERROR(_execute_nullable(left_exec_data, dst_null_map_data)); RETURN_IF_ERROR(_execute_nullable(right_exec_data, dst_null_map_data)); @@ -334,7 +363,6 @@ private: continue; } - // any element inside array is NULL, return NULL if (data.nested_nullmap_data) { ssize_t start = (*data.offsets_ptr)[row - 1]; ssize_t size = (*data.offsets_ptr)[row] - start; @@ -351,14 +379,10 @@ private: template Status _execute_internal(const ColumnArrayExecutionData& left_data, - const ColumnArrayExecutionData& right_data, - const UInt8* dst_nullmap_data, UInt8* dst_data) const { + const ColumnArrayExecutionData& right_data, UInt8* dst_nullmap_data, + UInt8* dst_data) const { using ExecutorImpl = OverlapSetImpl; for (ssize_t row = 0; row < left_data.offsets_ptr->size(); ++row) { - if (dst_nullmap_data[row]) { - continue; - } - ssize_t left_start = (*left_data.offsets_ptr)[row - 1]; ssize_t left_size = (*left_data.offsets_ptr)[row] - left_start; ssize_t right_start = (*right_data.offsets_ptr)[row - 1]; @@ -368,13 +392,42 @@ private: continue; } - ExecutorImpl impl; + const auto* small_data = &left_data; + const auto* large_data = &right_data; + + ssize_t small_start = left_start; + ssize_t large_start = right_start; + ssize_t small_size = left_size; + ssize_t large_size = right_size; if (right_size < left_size) { - impl.insert_array(right_data.nested_col, right_start, right_size); - dst_data[row] = impl.find_any(left_data.nested_col, left_start, left_size); + std::swap(small_data, large_data); + std::swap(small_start, large_start); + std::swap(small_size, large_size); + } + + ExecutorImpl impl; + if (small_data->nested_nullmap_data) { + impl.template insert_array(small_data->nested_col, + small_data->nested_nullmap_data, small_start, + small_size); } else { - impl.insert_array(left_data.nested_col, left_start, left_size); - dst_data[row] = impl.find_any(right_data.nested_col, right_start, right_size); + impl.template insert_array(small_data->nested_col, + small_data->nested_nullmap_data, small_start, + small_size); + } + + if (large_data->nested_nullmap_data) { + dst_data[row] = impl.template find_any(large_data->nested_col, + large_data->nested_nullmap_data, + large_start, large_size); + } else { + dst_data[row] = impl.template find_any(large_data->nested_col, + large_data->nested_nullmap_data, + large_start, large_size); + } + + if (dst_data[row]) { + dst_nullmap_data[row] = 0; } } return Status::OK(); diff --git a/be/test/vec/function/function_arrays_overlap_test.cpp b/be/test/vec/function/function_arrays_overlap_test.cpp index 3297f5fc28..bd7a29d6f6 100644 --- a/be/test/vec/function/function_arrays_overlap_test.cpp +++ b/be/test/vec/function/function_arrays_overlap_test.cpp @@ -15,15 +15,14 @@ // specific language governing permissions and limitations // under the License. +#include + #include -#include "common/status.h" #include "function_test_util.h" -#include "gtest/gtest_pred_impl.h" #include "testutil/any_type.h" #include "vec/core/field.h" #include "vec/core/types.h" -#include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" namespace doris::vectorized { @@ -113,8 +112,15 @@ TEST(function_arrays_overlap_test, arrays_overlap) { Array vec1 = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67), ut_type::DECIMALFIELD(0.0)}; Array vec2 = {ut_type::DECIMALFIELD(17014116.67)}; - DataSet data_set = { - {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}}; + + Array vec3 = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67), + Null()}; + Array vec4 = {ut_type::DECIMALFIELD(-17014116.67)}; + Array vec5 = {ut_type::DECIMALFIELD(-17014116.68)}; + DataSet data_set = {{{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, + {{vec1, Null()}, Null()}, {{empty_arr, vec1}, UInt8(0)}, + {{vec3, vec4}, UInt8(1)}, {{vec3, vec5}, Null()}, + {{vec4, vec3}, UInt8(1)}, {{vec5, vec3}, Null()}}; static_cast(check_function(func_name, input_types, data_set)); } @@ -127,10 +133,49 @@ TEST(function_arrays_overlap_test, arrays_overlap) { Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; Array vec2 = {Field(String("abc", 3))}; Array vec3 = {Field(String("", 0))}; - DataSet data_set = {{{vec1, vec2}, UInt8(1)}, - {{vec1, vec3}, UInt8(1)}, - {{Null(), vec1}, Null()}, - {{empty_arr, vec1}, UInt8(0)}}; + Array vec4 = {Field(String("abc", 3)), Null()}; + Array vec5 = {Field(String("abcd", 4)), Null()}; + DataSet data_set = {{{vec1, vec2}, UInt8(1)}, {{vec1, vec3}, UInt8(1)}, + {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}, + {{vec4, vec1}, UInt8(1)}, {{vec1, vec5}, Null()}, + {{vec1, vec4}, UInt8(1)}, {{vec5, vec1}, Null()}}; + + static_cast(check_function(func_name, input_types, data_set)); + } + + // arrays_overlap(Array, Array), Non-nullable + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128V2, TypeIndex::Array, + TypeIndex::Decimal128V2}; + + Array vec1 = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67), + ut_type::DECIMALFIELD(0.0)}; + Array vec2 = {ut_type::DECIMALFIELD(17014116.67)}; + + Array vec3 = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67)}; + Array vec4 = {ut_type::DECIMALFIELD(-17014116.67)}; + Array vec5 = {ut_type::DECIMALFIELD(-17014116.68)}; + DataSet data_set = {{{vec1, vec2}, UInt8(1)}, {{empty_arr, vec1}, UInt8(0)}, + {{vec3, vec4}, UInt8(1)}, {{vec3, vec5}, UInt8(0)}, + {{vec4, vec3}, UInt8(1)}, {{vec5, vec3}, UInt8(0)}}; + + static_cast(check_function(func_name, input_types, data_set)); + } + + // arrays_overlap(Array, Array), Non-nullable + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::Array, + TypeIndex::String}; + + Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec2 = {Field(String("abc", 3))}; + Array vec3 = {Field(String("", 0))}; + Array vec4 = {Field(String("abc", 3))}; + Array vec5 = {Field(String("abcd", 4))}; + DataSet data_set = {{{vec1, vec2}, UInt8(1)}, {{vec1, vec3}, UInt8(1)}, + {{empty_arr, vec1}, UInt8(0)}, {{vec4, vec1}, UInt8(1)}, + {{vec1, vec5}, UInt8(0)}, {{vec1, vec4}, UInt8(1)}, + {{vec5, vec1}, UInt8(0)}}; static_cast(check_function(func_name, input_types, data_set)); } diff --git a/be/test/vec/function/function_test_util.cpp b/be/test/vec/function/function_test_util.cpp index 88014c7273..229396fba1 100644 --- a/be/test/vec/function/function_test_util.cpp +++ b/be/test/vec/function/function_test_util.cpp @@ -170,6 +170,9 @@ size_t type_index_to_data_type(const std::vector& input_types, size_t i return ret; } desc.children.push_back(sub_desc.type_desc); + if (sub_desc.is_nullable) { + sub_type = make_nullable(sub_type); + } type = std::make_shared(sub_type); return ret + 1; } diff --git a/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out b/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out index a93c7e2a2c..40f5192033 100644 --- a/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out +++ b/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out @@ -785,16 +785,20 @@ 2019-01-01 ee27ee1da291e46403c408e220bed6e1 ["y"] -- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] + +-- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- -- !sql -- -- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- - --- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- 2017-01-01 021603e7dcfe65d44af0efd0e5aee154 ["n"] @@ -808,6 +812,7 @@ 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 [] 2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 ["o"] 2017-01-01 d93d942d985a8fb7547c72dada8d332d ["d", "e", "f", "g", "h", "i", "j", "k", "l"] +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- 2017-01-01 021603e7dcfe65d44af0efd0e5aee154 ["n"] @@ -821,6 +826,7 @@ 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 [] 2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 ["o"] 2017-01-01 d93d942d985a8fb7547c72dada8d332d ["d", "e", "f", "g", "h", "i", "j", "k", "l"] +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- @@ -965,16 +971,20 @@ 2019-01-01 ee27ee1da291e46403c408e220bed6e1 ["y"] -- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] + +-- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- -- !sql -- -- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- - --- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- 2017-01-01 021603e7dcfe65d44af0efd0e5aee154 ["n"] @@ -988,6 +998,7 @@ 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 [] 2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 ["o"] 2017-01-01 d93d942d985a8fb7547c72dada8d332d ["d", "e", "f", "g", "h", "i", "j", "k", "l"] +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- 2017-01-01 021603e7dcfe65d44af0efd0e5aee154 ["n"] @@ -1001,6 +1012,7 @@ 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 [] 2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 ["o"] 2017-01-01 d93d942d985a8fb7547c72dada8d332d ["d", "e", "f", "g", "h", "i", "j", "k", "l"] +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] -- !sql -- diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out index 3b3bfb17f9..b2a47bde97 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out @@ -29,7 +29,7 @@ 5 \N 6 \N 7 \N -8 \N +8 true 9 true -- !select -- diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out index f8b42d4933..3d529463f3 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out @@ -441,7 +441,7 @@ false true -- !sql -- -\N +true -- !sql -- false @@ -1425,7 +1425,7 @@ false true -- !sql -- -\N +true -- !sql -- false @@ -2409,7 +2409,7 @@ false true -- !sql -- -\N +true -- !sql -- false diff --git a/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out index ff409b7405..fdc35be68f 100644 --- a/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out +++ b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out @@ -723,16 +723,20 @@ 2019-01-01 ee27ee1da291e46403c408e220bed6e1 {"inventors":["y"]} -- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} + +-- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql -- -- !sql -- -- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql -- - --- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql -- 2017-01-01 021603e7dcfe65d44af0efd0e5aee154 {"inventors":["n"]} @@ -746,6 +750,7 @@ 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 {} 2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 {"inventors":["o"]} 2017-01-01 d93d942d985a8fb7547c72dada8d332d {"inventors":["d","e","f","g","h","i","j","k","l"]} +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql -- 2017-01-01 021603e7dcfe65d44af0efd0e5aee154 {"inventors":["n"]} @@ -759,6 +764,7 @@ 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 {} 2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 {"inventors":["o"]} 2017-01-01 d93d942d985a8fb7547c72dada8d332d {"inventors":["d","e","f","g","h","i","j","k","l"]} +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql -- @@ -903,16 +909,20 @@ 2019-01-01 ee27ee1da291e46403c408e220bed6e1 {"inventors":["y"]} -- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} + +-- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql -- -- !sql -- -- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql -- - --- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql -- 2017-01-01 021603e7dcfe65d44af0efd0e5aee154 {"inventors":["n"]} @@ -926,6 +936,7 @@ 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 {} 2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 {"inventors":["o"]} 2017-01-01 d93d942d985a8fb7547c72dada8d332d {"inventors":["d","e","f","g","h","i","j","k","l"]} +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql -- 2017-01-01 021603e7dcfe65d44af0efd0e5aee154 {"inventors":["n"]} @@ -939,6 +950,7 @@ 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 {} 2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 {"inventors":["o"]} 2017-01-01 d93d942d985a8fb7547c72dada8d332d {"inventors":["d","e","f","g","h","i","j","k","l"]} +2019-01-01 a648a447b8f71522f11632eba4b4adde {"inventors":["p","q","r","s","t"]} -- !sql --