From 31ab569c1d9ae8d0514d20792b8f045c1d3e1076 Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Wed, 23 Feb 2022 11:42:16 +0800 Subject: [PATCH] [Vectorized][Feature] support some bitmap functions (#8138) --- be/src/vec/CMakeLists.txt | 1 + be/src/vec/functions/function_bitmap.cpp | 361 ++++++++++-------- .../functions/function_bitmap_variadic.cpp | 142 +++++++ be/src/vec/functions/function_const.h | 2 +- .../vec/functions/simple_function_factory.h | 2 + be/test/vec/function/function_bitmap_test.cpp | 90 +++++ gensrc/script/doris_builtins_functions.py | 22 +- 7 files changed, 451 insertions(+), 169 deletions(-) create mode 100644 be/src/vec/functions/function_bitmap_variadic.cpp diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 742d5135c9..d9ec3090ae 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -105,6 +105,7 @@ set(VEC_FILES exprs/vinfo_func.cpp functions/math.cpp functions/function_bitmap.cpp + functions/function_bitmap_variadic.cpp functions/comparison.cpp functions/comparison_less.cpp functions/comparison_equals.cpp diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index bde20ad185..5bdbfd8c96 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -18,25 +18,21 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionBitmap.h // and modified by Doris -#include "util/string_parser.hpp" -#include "vec/functions/function_totype.h" -#include "vec/functions/function_const.h" -#include "vec/functions/simple_function_factory.h" -#include "vec/functions/function_string.h" #include "gutil/strings/numbers.h" #include "gutil/strings/split.h" +#include "util/string_parser.hpp" +#include "vec/functions/function_const.h" +#include "vec/functions/function_string.h" +#include "vec/functions/function_totype.h" +#include "vec/functions/simple_function_factory.h" namespace doris::vectorized { struct BitmapEmpty { static constexpr auto name = "bitmap_empty"; using ReturnColVec = ColumnBitmap; - static DataTypePtr get_return_type() { - return std::make_shared(); - } - static auto init_value() { - return BitmapValue{}; - } + static DataTypePtr get_return_type() { return std::make_shared(); } + static auto init_value() { return BitmapValue {}; } }; struct NameToBitmap { @@ -62,12 +58,12 @@ struct ToBitmapImpl { // TODO: which where cause problem in to_bitmap(null), rethink how to slove the problem // of null -// if (UNLIKELY(parse_result != StringParser::PARSE_SUCCESS)) { -// return Status::RuntimeError( -// fmt::format("The input: {:.{}} is not valid, to_bitmap only support bigint " -// "value from 0 to 18446744073709551615 currently", -// raw_str, str_size)); -// } + // if (UNLIKELY(parse_result != StringParser::PARSE_SUCCESS)) { + // return Status::RuntimeError( + // fmt::format("The input: {:.{}} is not valid, to_bitmap only support bigint " + // "value from 0 to 18446744073709551615 currently", + // raw_str, str_size)); + // } res.emplace_back(); res.back().add(int_value); } @@ -120,7 +116,7 @@ struct BitmapHash { const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); size_t str_size = offsets[i] - offsets[i - 1] - 1; uint32_t hash_value = - HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED); + HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED); res.emplace_back(); res.back().add(hash_value); } @@ -149,69 +145,6 @@ struct BitmapCount { } }; -struct NameBitmapAnd { - static constexpr auto name = "bitmap_and"; -}; - -template -struct BitmapAnd { - using ResultDataType = DataTypeBitMap; - using T0 = typename LeftDataType::FieldType; - using T1 = typename RightDataType::FieldType; - using TData = std::vector; - - static Status vector_vector(const TData& lvec, const TData& rvec, TData& res) { - size_t size = lvec.size(); - for (size_t i = 0; i < size; ++i) { - res[i] = lvec[i]; - res[i] &= rvec[i]; - } - return Status::OK(); - } -}; - -struct NameBitmapOr { - static constexpr auto name = "bitmap_or"; -}; - -template -struct BitmapOr { - using ResultDataType = DataTypeBitMap; - using T0 = typename LeftDataType::FieldType; - using T1 = typename RightDataType::FieldType; - using TData = std::vector; - - static Status vector_vector(const TData& lvec, const TData& rvec, TData& res) { - size_t size = lvec.size(); - for (size_t i = 0; i < size; ++i) { - res[i] = lvec[i]; - res[i] |= rvec[i]; - } - return Status::OK(); - } -}; - -struct NameBitmapXor { - static constexpr auto name = "bitmap_xor"; -}; - -template -struct BitmapXor { - using ResultDataType = DataTypeBitMap; - using T0 = typename LeftDataType::FieldType; - using T1 = typename RightDataType::FieldType; - using TData = std::vector; - - static Status vector_vector(const TData& lvec, const TData& rvec, TData& res) { - size_t size = lvec.size(); - for (size_t i = 0; i < size; ++i) { - res[i] = lvec[i]; - res[i] ^= rvec[i]; - } - return Status::OK(); - } -}; - struct NameBitmapNot { static constexpr auto name = "bitmap_not"; }; @@ -233,6 +166,56 @@ struct BitmapNot { } }; +struct NameBitmapAndNot { + static constexpr auto name = "bitmap_and_not"; +}; + +template +struct BitmapAndNot { + using ResultDataType = DataTypeBitMap; + using T0 = typename LeftDataType::FieldType; + using T1 = typename RightDataType::FieldType; + using TData = std::vector; + + static Status vector_vector(const TData& lvec, const TData& rvec, TData& res) { + size_t size = lvec.size(); + BitmapValue mid_data; + for (size_t i = 0; i < size; ++i) { + mid_data = lvec[i]; + mid_data &= rvec[i]; + res[i] = lvec[i]; + res[i] -= mid_data; + mid_data.clear(); + } + return Status::OK(); + } +}; + +struct NameBitmapAndNotCount { + static constexpr auto name = "bitmap_and_not_count"; +}; + +template +struct BitmapAndNotCount { + using ResultDataType = DataTypeInt64; + using T0 = typename LeftDataType::FieldType; + using T1 = typename RightDataType::FieldType; + using TData = std::vector; + using ResTData = typename ColumnVector::Container; + + static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) { + size_t size = lvec.size(); + BitmapValue mid_data; + for (size_t i = 0; i < size; ++i) { + mid_data = lvec[i]; + mid_data &= rvec[i]; + res[i] = lvec[i].andnot_cardinality(mid_data); + mid_data.clear(); + } + return Status::OK(); + } +}; + struct NameBitmapContains { static constexpr auto name = "bitmap_contains"; }; @@ -278,6 +261,30 @@ struct BitmapHasAny { } }; +struct NameBitmapHasAll { + static constexpr auto name = "bitmap_has_all"; +}; + +template +struct BitmapHasAll { + using ResultDataType = DataTypeUInt8; + using T0 = typename LeftDataType::FieldType; + using T1 = typename RightDataType::FieldType; + using TData = std::vector; + using ResTData = typename ColumnVector::Container; + + static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) { + size_t size = lvec.size(); + for (size_t i = 0; i < size; ++i) { + uint64_t lhs_cardinality = lvec[i].cardinality(); + auto bitmap = const_cast(lvec[i]); + bitmap |= rvec[i]; + res[i] = bitmap.cardinality() == lhs_cardinality; + } + return Status::OK(); + } +}; + struct NameBitmapMin { static constexpr auto name = "bitmap_min"; }; @@ -345,108 +352,148 @@ struct BitmapToString { } }; -struct NameBitmapAndCount { - static constexpr auto name = "bitmap_and_count"; -}; -template -struct BitmapAndCount { - using ResultDataType = DataTypeInt64; - using T0 = typename LeftDataType::FieldType; - using T1 = typename RightDataType::FieldType; - using TData = std::vector; - using ResTData = typename ColumnVector::Container; +struct SubBitmap { + static constexpr auto name = "sub_bitmap"; + using TData1 = std::vector; + using TData2 = typename ColumnVector::Container; - static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) { - size_t size = lvec.size(); - BitmapValue val; - for (size_t i = 0; i < size; ++i) { - val |= lvec[i]; - val &= rvec[i]; - res[i] = val.cardinality(); - val.clear(); + static Status vector_vector(const TData1& bitmap_data, const TData2& offset_data, + const TData2& limit_data, NullMap& null_map, + size_t input_rows_count, TData1& res) { + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + continue; + } + if (limit_data[i] <= 0) { + null_map[i] = 1; + continue; + } + if (const_cast(bitmap_data)[i].offset_limit(offset_data[i], limit_data[i], + &res[i]) == 0) { + null_map[i] = 1; + } } return Status::OK(); } }; -struct NameBitmapOrCount { - static constexpr auto name = "bitmap_or_count"; -}; -template -struct BitmapOrCount { - using ResultDataType = DataTypeInt64; - using T0 = typename LeftDataType::FieldType; - using T1 = typename RightDataType::FieldType; - using TData = std::vector; - using ResTData = typename ColumnVector::Container; +struct BitmapSubsetLimit { + static constexpr auto name = "bitmap_subset_limit"; + using TData1 = std::vector; + using TData2 = typename ColumnVector::Container; - static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) { - size_t size = lvec.size(); - BitmapValue val; - for (size_t i = 0; i < size; ++i) { - val |= lvec[i]; - val |= rvec[i]; - res[i] = val.cardinality(); - val.clear(); + static Status vector_vector(const TData1& bitmap_data, const TData2& offset_data, + const TData2& limit_data, NullMap& null_map, + size_t input_rows_count, TData1& res) { + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + continue; + } + if (offset_data[i] < 0 || limit_data[i] < 0) { + null_map[i] = 1; + continue; + } + const_cast(bitmap_data)[i].sub_limit(offset_data[i], limit_data[i], &res[i]); } return Status::OK(); } }; -struct NameBitmapXorCount { - static constexpr auto name = "bitmap_xor_count"; -}; -template -struct BitmapXorCount { - using ResultDataType = DataTypeInt64; - using T0 = typename LeftDataType::FieldType; - using T1 = typename RightDataType::FieldType; - using TData = std::vector; - using ResTData = typename ColumnVector::Container; +struct BitmapSubsetInRange { + static constexpr auto name = "bitmap_subset_in_range"; + using TData1 = std::vector; + using TData2 = typename ColumnVector::Container; - static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) { - size_t size = lvec.size(); - BitmapValue val; - for (size_t i = 0; i < size; ++i) { - val |= lvec[i]; - val ^= rvec[i]; - res[i] = val.cardinality(); - val.clear(); + static Status vector_vector(const TData1& bitmap_data, const TData2& range_start, + const TData2& range_end, NullMap& null_map, size_t input_rows_count, + TData1& res) { + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + continue; + } + if (range_start[i] >= range_end[i] || range_start[i] < 0 || range_end[i] < 0) { + null_map[i] = 1; + continue; + } + const_cast(bitmap_data)[i].sub_range(range_start[i], range_end[i], &res[i]); } return Status::OK(); } }; +template +class FunctionBitmapSubs : public IFunction { +public: + static constexpr auto name = Impl::name; + String get_name() const override { return name; } + + static FunctionPtr create() { return std::make_shared(); } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared()); + } + + size_t get_number_of_arguments() const override { return 3; } + + bool use_default_implementation_for_nulls() const override { return false; } + + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + DCHECK_EQ(arguments.size(), 3); + auto res_null_map = ColumnUInt8::create(input_rows_count, 0); + auto res_data_column = ColumnBitmap::create(input_rows_count); + ColumnPtr argument_columns[3]; + + for (int i = 0; i < 3; ++i) { + argument_columns[i] = + block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column(*argument_columns[i])) { + VectorizedUtils::update_null_map(res_null_map->get_data(), + nullable->get_null_map_data()); + argument_columns[i] = nullable->get_nested_column_ptr(); + } + } + + auto bitmap_column = assert_cast(argument_columns[0].get()); + auto offset_column = assert_cast*>(argument_columns[1].get()); + auto limit_column = assert_cast*>(argument_columns[2].get()); + + Impl::vector_vector(bitmap_column->get_data(), offset_column->get_data(), + limit_column->get_data(), res_null_map->get_data(), input_rows_count, + res_data_column->get_data()); + + block.get_by_position(result).column = + ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); + return Status::OK(); + } +}; + using FunctionBitmapEmpty = FunctionConst; using FunctionToBitmap = FunctionUnaryToType; -using FunctionBitmapFromString = FunctionUnaryToType; +using FunctionBitmapFromString = FunctionUnaryToType; using FunctionBitmapHash = FunctionUnaryToType; - using FunctionBitmapCount = FunctionUnaryToType; -using FunctionBitmapAndCount = - FunctionBinaryToType; -using FunctionBitmapOrCount = - FunctionBinaryToType; -using FunctionBitmapXorCount = - FunctionBinaryToType; using FunctionBitmapMin = FunctionUnaryToType; using FunctionBitmapMax = FunctionUnaryToType; using FunctionBitmapToString = FunctionUnaryToType; - -using FunctionBitmapAnd = - FunctionBinaryToType; -using FunctionBitmapOr = - FunctionBinaryToType; -using FunctionBitmapXor = - FunctionBinaryToType; using FunctionBitmapNot = FunctionBinaryToType; - +using FunctionBitmapAndNot = + FunctionBinaryToType; +using FunctionBitmapAndNotCount = FunctionBinaryToType; using FunctionBitmapContains = FunctionBinaryToType; using FunctionBitmapHasAny = FunctionBinaryToType; +using FunctionBitmapHasAll = + FunctionBinaryToType; +using FunctionSubBitmap = FunctionBitmapSubs; +using FunctionBitmapSubsetLimit = FunctionBitmapSubs; +using FunctionBitmapSubsetInRange = FunctionBitmapSubs; void register_function_bitmap(SimpleFunctionFactory& factory) { factory.register_function(); @@ -454,18 +501,18 @@ void register_function_bitmap(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); factory.register_function(); - factory.register_function(); - factory.register_function(); - factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); - factory.register_function(); - factory.register_function(); - factory.register_function(); factory.register_function(); + factory.register_function(); + factory.register_function(); factory.register_function(); factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_bitmap_variadic.cpp b/be/src/vec/functions/function_bitmap_variadic.cpp new file mode 100644 index 0000000000..d9679c3819 --- /dev/null +++ b/be/src/vec/functions/function_bitmap_variadic.cpp @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/columns/column_complex.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_bitmap.h" +#include "vec/functions/function_const.h" +#include "vec/functions/function_string.h" +#include "vec/functions/function_totype.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +#define BITMAP_FUNCTION_VARIADIC(CLASS, FUNCTION_NAME, OP) \ + struct CLASS { \ + static constexpr auto name = #FUNCTION_NAME; \ + using ResultDataType = DataTypeBitMap; \ + static Status vector_vector(ColumnPtr argument_columns[], size_t col_size, \ + size_t input_rows_count, std::vector& res) { \ + auto& mid_data = \ + assert_cast(argument_columns[0].get())->get_data(); \ + for (size_t row = 0; row < input_rows_count; ++row) { \ + res[row] = mid_data[row]; \ + } \ + for (size_t col = 1; col < col_size; ++col) { \ + auto& col_data = \ + assert_cast(argument_columns[col].get())->get_data(); \ + for (size_t row = 0; row < input_rows_count; ++row) { \ + res[row] OP col_data[row]; \ + } \ + } \ + return Status::OK(); \ + } \ + } + +#define BITMAP_FUNCTION_COUNT_VARIADIC(CLASS, FUNCTION_NAME, OP) \ + struct CLASS { \ + static constexpr auto name = #FUNCTION_NAME; \ + using ResultDataType = DataTypeInt64; \ + using TData = std::vector; \ + using ResTData = typename ColumnVector::Container; \ + static Status vector_vector(ColumnPtr argument_columns[], size_t col_size, \ + size_t input_rows_count, ResTData& res) { \ + TData vals = assert_cast(argument_columns[0].get())->get_data(); \ + for (size_t col = 1; col < col_size; ++col) { \ + auto& col_data = \ + assert_cast(argument_columns[col].get())->get_data(); \ + for (size_t row = 0; row < input_rows_count; ++row) { \ + vals[row] OP col_data[row]; \ + } \ + } \ + for (size_t row = 0; row < input_rows_count; ++row) { \ + res[row] = vals[row].cardinality(); \ + } \ + return Status::OK(); \ + } \ + } + +BITMAP_FUNCTION_VARIADIC(BitmapOr, bitmap_or, |=); +BITMAP_FUNCTION_VARIADIC(BitmapAnd, bitmap_and, &=); +BITMAP_FUNCTION_VARIADIC(BitmapXor, bitmap_xor, ^=); +BITMAP_FUNCTION_COUNT_VARIADIC(BitmapOrCount, bitmap_or_count, |=); +BITMAP_FUNCTION_COUNT_VARIADIC(BitmapAndCount, bitmap_and_count, &=); +BITMAP_FUNCTION_COUNT_VARIADIC(BitmapXorCount, bitmap_xor_count, ^=); + +template +class FunctionBitMapVariadic : public IFunction { +public: + static constexpr auto name = Impl::name; + + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 0; } + + bool is_variadic() const override { return true; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + using ResultDataType = typename Impl::ResultDataType; + return std::make_shared(); + } + + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + size_t argument_size = arguments.size(); + ColumnPtr argument_columns[argument_size]; + + for (size_t i = 0; i < argument_size; ++i) { + argument_columns[i] = + block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); + } + + using ResultDataType = typename Impl::ResultDataType; //DataTypeBitMap or DataTypeInt64 + using ResultType = typename ResultDataType::FieldType; //BitmapValue or Int64 + using ColVecResult = + std::conditional_t, ColumnComplexType, + ColumnVector>; + typename ColVecResult::MutablePtr col_res = nullptr; + col_res = ColVecResult::create(); + + auto& vec_res = col_res->get_data(); + vec_res.resize(input_rows_count); + + Impl::vector_vector(argument_columns, argument_size, input_rows_count, vec_res); + block.replace_by_position(result, std::move(col_res)); + return Status::OK(); + } +}; + +using FunctionBitmapOr = FunctionBitMapVariadic; +using FunctionBitmapXor = FunctionBitMapVariadic; +using FunctionBitmapAnd = FunctionBitMapVariadic; +using FunctionBitmapOrCount = FunctionBitMapVariadic; +using FunctionBitmapAndCount = FunctionBitMapVariadic; +using FunctionBitmapXorCount = FunctionBitMapVariadic; + +void register_function_bitmap_variadic(SimpleFunctionFactory& factory) { + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/functions/function_const.h b/be/src/vec/functions/function_const.h index 46547b53eb..30f3852781 100644 --- a/be/src/vec/functions/function_const.h +++ b/be/src/vec/functions/function_const.h @@ -69,7 +69,7 @@ public: size_t result, size_t input_rows_count) override { auto column = Impl::ReturnColVec::create(); column->get_data().emplace_back(Impl::init_value()); - block.replace_by_position(result, ColumnConst::create(std::move(column), 1)); + block.replace_by_position(result, ColumnConst::create(std::move(column), input_rows_count)); return Status::OK(); } }; diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index c7c35b7232..390418d7d8 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -46,6 +46,7 @@ void register_function_bit(SimpleFunctionFactory& factory); void register_function_math(SimpleFunctionFactory& factory); void register_function_modulo(SimpleFunctionFactory& factory); void register_function_bitmap(SimpleFunctionFactory& factory); +void register_function_bitmap_variadic(SimpleFunctionFactory& factory); void register_function_is_null(SimpleFunctionFactory& factory); void register_function_is_not_null(SimpleFunctionFactory& factory); void register_function_to_time_fuction(SimpleFunctionFactory& factory); @@ -148,6 +149,7 @@ public: static SimpleFunctionFactory instance; std::call_once(oc, [&]() { register_function_bitmap(instance); + register_function_bitmap_variadic(instance); register_function_hll_cardinality(instance); register_function_hll_empty(instance); register_function_hll_hash(instance); diff --git a/be/test/vec/function/function_bitmap_test.cpp b/be/test/vec/function/function_bitmap_test.cpp index 12d10c6ccd..95cb072f7e 100644 --- a/be/test/vec/function/function_bitmap_test.cpp +++ b/be/test/vec/function/function_bitmap_test.cpp @@ -89,6 +89,22 @@ TEST(function_bitmap_test, function_bitmap_and_count) { delete bitmap1; delete bitmap2; delete empty_bitmap; + + { + InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap, TypeIndex::BitMap}; + BitmapValue bitmap1({33, 1, 2019}); + BitmapValue bitmap2({0, 33, std::numeric_limits::min()}); + BitmapValue bitmap3({33, 5, std::numeric_limits::max()}); + auto empty_bitmap = new BitmapValue(); //test empty + + DataSet data_set = {{{&bitmap1, &bitmap2, empty_bitmap}, (int64_t)0}, + {{&bitmap1, &bitmap2, &bitmap3}, (int64_t)1}, //33 + {{&bitmap1, &bitmap2, Null()}, Null()}, + {{&bitmap1, &bitmap3, &bitmap3}, (int64_t)1}}; //33 + + check_function(func_name, input_types, data_set); + delete empty_bitmap; + } } TEST(function_bitmap_test, function_bitmap_or_count) { @@ -107,6 +123,22 @@ TEST(function_bitmap_test, function_bitmap_or_count) { delete bitmap2; delete bitmap3; delete empty_bitmap; + + { + InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap, TypeIndex::BitMap}; + BitmapValue bitmap1({1024, 1, 2019}); + BitmapValue bitmap2({0, 33, std::numeric_limits::min()}); + BitmapValue bitmap3({33, 5, std::numeric_limits::max()}); //18446744073709551615 + auto empty_bitmap = new BitmapValue(); //test empty + + DataSet data_set = {{{&bitmap1, &bitmap2, empty_bitmap}, (int64_t)5}, //0,1,33,1024,2019 + {{&bitmap1, &bitmap2, &bitmap3}, (int64_t)7}, //0,1,5,33,1024,2019,18446744073709551615 + {{&bitmap1, empty_bitmap, Null()}, Null()}, + {{&bitmap1, &bitmap3, &bitmap3}, (int64_t)6}}; //1,5,33,1024,2019,18446744073709551615 + + check_function(func_name, input_types, data_set); + delete empty_bitmap; + } } TEST(function_bitmap_test, function_bitmap_xor_count) { @@ -127,7 +159,65 @@ TEST(function_bitmap_test, function_bitmap_xor_count) { delete bitmap3; delete bitmap4; delete empty_bitmap; + + { + InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap, TypeIndex::BitMap}; + BitmapValue bitmap1({1024, 1, 2019}); + BitmapValue bitmap2({0, 33, std::numeric_limits::min()}); + BitmapValue bitmap3({33, 5, std::numeric_limits::max()}); + auto empty_bitmap = new BitmapValue(); //test empty + + DataSet data_set = {{{&bitmap1, &bitmap2, empty_bitmap}, (int64_t)5}, //0,1,33,1024,2019 + {{&bitmap1, &bitmap2, &bitmap3}, (int64_t)6}, //0,1,5,1024,2019,18446744073709551615 + {{&bitmap1, empty_bitmap, Null()}, Null()}, + {{&bitmap1, &bitmap3, &bitmap3}, (int64_t)3}}; //1,1024,2019 + + check_function(func_name, input_types, data_set); + delete empty_bitmap; + } } + +TEST(function_bitmap_test, function_bitmap_and_not_count) { + std::string func_name = "bitmap_and_not_count"; + InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap}; + BitmapValue bitmap1({1, 2, 3}); + BitmapValue bitmap2({3, 4, std::numeric_limits::min()}); + BitmapValue bitmap3({33, 5, std::numeric_limits::max()}); + auto empty_bitmap = new BitmapValue(); + + DataSet data_set = {{{&bitmap1, empty_bitmap}, (int64_t)3}, //1,2,3 + {{&bitmap2, Null()}, Null()}, + {{&bitmap2, &bitmap3}, (int64_t)3}, //0,3,4 + {{&bitmap1, &bitmap2}, (int64_t)2}}; //1,2 + + check_function(func_name, input_types, data_set); + delete empty_bitmap; +} +TEST(function_bitmap_test, function_bitmap_has_all) { + std::string func_name = "bitmap_has_all"; + InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap}; + + BitmapValue bitmap1( + {1, 4, 5, std::numeric_limits::max(), std::numeric_limits::min()}); + BitmapValue bitmap2( + {4, std::numeric_limits::max(), std::numeric_limits::min()}); + BitmapValue bitmap3 = BitmapValue({0, 1, 2}); + BitmapValue bitmap4 = BitmapValue({0, 1, 2, std::numeric_limits::max()}); + BitmapValue bitmap5 = BitmapValue({0, 1, 2}); + auto empty_bitmap1 = new BitmapValue(); + auto empty_bitmap2 = new BitmapValue(); + + DataSet data_set = {{{&bitmap1, &bitmap2}, uint8(true)}, + {{empty_bitmap1, empty_bitmap2}, uint8(true)}, + {{&bitmap3, &bitmap4}, uint8(false)}, + {{&bitmap4, &bitmap5}, uint8(true)}, + {{Null(), empty_bitmap1}, Null()}}; + + check_function(func_name, input_types, data_set); + delete empty_bitmap1; + delete empty_bitmap2; +} + } // namespace doris::vectorized int main(int argc, char** argv) { diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 2e5c4bf1bd..a75a8cd9a6 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1155,25 +1155,25 @@ visible_functions = [ '', '', 'vec', ''], [['bitmap_and_not_count'], 'BIGINT', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions20bitmap_and_not_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', - '', '', '', ''], + '', '', 'vec', ''], [['bitmap_empty'], 'BITMAP', [], '_ZN5doris15BitmapFunctions12bitmap_emptyEPN9doris_udf15FunctionContextE', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], [['bitmap_or'], 'BITMAP', ['BITMAP','BITMAP','...'], '_ZN5doris15BitmapFunctions9bitmap_orEPN9doris_udf15FunctionContextERKNS1_9StringValEiPS5_', - '', '', '', ''], + '', '', 'vec', ''], [['bitmap_or'], 'BITMAP', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions9bitmap_orEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''], [['bitmap_xor'], 'BITMAP', ['BITMAP','BITMAP','...'], '_ZN5doris15BitmapFunctions10bitmap_xorEPN9doris_udf15FunctionContextERKNS1_9StringValEiPS5_', - '', '', '', ''], + '', '', 'vec', ''], [['bitmap_xor'], 'BITMAP', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions10bitmap_xorEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''], [['bitmap_xor_count'], 'BIGINT', ['BITMAP','BITMAP','...'], '_ZN5doris15BitmapFunctions16bitmap_xor_countEPN9doris_udf15FunctionContextERKNS1_9StringValEiPS5_', - '', '', '', ''], + '', '', 'vec', ''], [['bitmap_xor_count'], 'BIGINT', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions16bitmap_xor_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''], @@ -1182,13 +1182,13 @@ visible_functions = [ '', '', 'vec', ''], [['bitmap_and'], 'BITMAP', ['BITMAP','BITMAP','...'], '_ZN5doris15BitmapFunctions10bitmap_andEPN9doris_udf15FunctionContextERKNS1_9StringValEiPS5_', - '', '', '', ''], + '', '', 'vec', ''], [['bitmap_and'], 'BITMAP', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions10bitmap_andEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''], [['bitmap_and_not'], 'BITMAP', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions14bitmap_and_notEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', - '', '', '', ''], + '', '', 'vec', ''], [['bitmap_to_string'], 'STRING', ['BITMAP'], '_ZN5doris15BitmapFunctions16bitmap_to_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], @@ -1215,16 +1215,16 @@ visible_functions = [ '', '', 'vec', ''], [['bitmap_subset_in_range'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], '_ZN5doris15BitmapFunctions22bitmap_subset_in_rangeEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_', - '', '', 'vec', ''], + '', '', 'vec', 'ALWAYS_NULLABLE'], [['bitmap_subset_limit'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], '_ZN5doris15BitmapFunctions19bitmap_subset_limitEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_', - '', '', 'vec', ''], + '', '', 'vec', 'ALWAYS_NULLABLE'], [['bitmap_and_count'], 'BIGINT', ['BITMAP','BITMAP','...'], '_ZN5doris15BitmapFunctions16bitmap_and_countEPN9doris_udf15FunctionContextERKNS1_9StringValEiPS5_', - '', '', '', ''], + '', '', 'vec', ''], [['bitmap_and_count'], 'BIGINT', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions16bitmap_and_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', - '', '', '', ''], + '', '', 'vec', ''], [['bitmap_or_count'], 'BIGINT', ['BITMAP','BITMAP','...'], '_ZN5doris15BitmapFunctions15bitmap_or_countEPN9doris_udf15FunctionContextERKNS1_9StringValEiPS5_', '', '', 'vec', ''], @@ -1233,7 +1233,7 @@ visible_functions = [ '', '', 'vec', ''], [['sub_bitmap'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'], '_ZN5doris15BitmapFunctions10sub_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_', - '', '', 'vec', ''], + '', '', 'vec', 'ALWAYS_NULLABLE'], # hash functions [['murmur_hash3_32'], 'INT', ['VARCHAR', '...'],