[feature](udf) make orthogonal bitmap udaf as build in functions (#7211)

move orthogonal bitmap udaf as build in functions
add three buildin bitmap functions:

- orthogonal_bitmap_intersect
- orthogonal_bitmap_intersect_count
- orthogonal_bitmap_union_count
This commit is contained in:
Zhengguo Yang
2021-12-07 09:57:26 +08:00
committed by GitHub
parent 8660bf69ff
commit 62d12067aa
21 changed files with 668 additions and 2323 deletions

View File

@ -193,12 +193,8 @@ public:
}
}
// calculate the intersection for _bitmaps's bitmap values
int64_t intersect_count() const {
if (_bitmaps.empty()) {
return 0;
}
// intersection
BitmapValue intersect() const {
BitmapValue result;
auto it = _bitmaps.begin();
result |= it->second;
@ -206,8 +202,15 @@ public:
for (; it != _bitmaps.end(); it++) {
result &= it->second;
}
return result;
}
return result.cardinality();
// calculate the intersection for _bitmaps's bitmap values
int64_t intersect_count() const {
if (_bitmaps.empty()) {
return 0;
}
return intersect().cardinality();
}
// the serialize size
@ -767,6 +770,123 @@ StringVal BitmapFunctions::bitmap_subset_limit(FunctionContext* ctx, const Strin
return serialize(ctx, &ret_bitmap);
}
void BitmapFunctions::orthogonal_bitmap_union_count_init(FunctionContext* ctx, StringVal* dst) {
dst->is_null = false;
dst->len = sizeof(BitmapValue);
dst->ptr = (uint8_t*)new BitmapValue();
}
StringVal BitmapFunctions::orthogonal_bitmap_count_serialize(FunctionContext* ctx,
const StringVal& src) {
if (src.is_null) {
return src;
}
auto src_bitmap = reinterpret_cast<BitmapValue*>(src.ptr);
int64_t val = src_bitmap->cardinality();
StringVal result(ctx, sizeof(int64_t));
*(int64_t*)result.ptr = val;
delete src_bitmap;
return result;
}
// This is a init function for bitmap_intersect.
template <typename T, typename ValType>
void BitmapFunctions::orthogonal_bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) {
// constant args start from index 2
if (ctx->get_num_constant_args() > 1) {
dst->is_null = false;
dst->len = sizeof(BitmapIntersect<T>);
auto intersect = new BitmapIntersect<T>();
for (int i = 2; i < ctx->get_num_constant_args(); ++i) {
ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i));
intersect->add_key(detail::get_val<ValType, T>(*arg));
}
dst->ptr = (uint8_t*)intersect;
} else {
dst->is_null = false;
dst->len = sizeof(BitmapValue);
dst->ptr = (uint8_t*)new BitmapValue();
}
}
// This is a init function for intersect_count.
template <typename T, typename ValType>
void BitmapFunctions::orthogonal_bitmap_intersect_count_init(FunctionContext* ctx, StringVal* dst) {
if (ctx->get_num_constant_args() > 1) {
dst->is_null = false;
dst->len = sizeof(BitmapIntersect<T>);
auto intersect = new BitmapIntersect<T>();
// constant args start from index 2
for (int i = 2; i < ctx->get_num_constant_args(); ++i) {
ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i));
intersect->add_key(detail::get_val<ValType, T>(*arg));
}
dst->ptr = (uint8_t*)intersect;
} else {
dst->is_null = false;
dst->len = sizeof(int64_t);
dst->ptr = (uint8_t*)new int64_t;
*(int64_t*)dst->ptr = 0;
}
}
template <typename T>
StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize(FunctionContext* ctx,
const StringVal& src) {
auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr);
BitmapValue bitmap_val = src_bitmap->intersect();
StringVal result = serialize(ctx, &bitmap_val);
delete src_bitmap;
return result;
}
template <typename T>
BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize(FunctionContext* ctx,
const StringVal& src) {
auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr);
BigIntVal result = BigIntVal(src_bitmap->intersect_count());
delete src_bitmap;
return result;
}
void BitmapFunctions::orthogonal_bitmap_count_merge(FunctionContext* context, const StringVal& src,
StringVal* dst) {
if (dst->len != sizeof(int64_t)) {
auto dst_bitmap = reinterpret_cast<BitmapValue*>(dst->ptr);
delete dst_bitmap;
dst->is_null = false;
dst->len = sizeof(int64_t);
dst->ptr = (uint8_t*)new int64_t;
*(int64_t*)dst->ptr = 0;
}
*(int64_t*)dst->ptr += *(int64_t*)src.ptr;
}
BigIntVal BitmapFunctions::orthogonal_bitmap_count_finalize(FunctionContext* context,
const StringVal& src) {
auto* pval = reinterpret_cast<int64_t*>(src.ptr);
int64_t result = *pval;
delete pval;
return result;
}
template <typename T>
StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize(FunctionContext* ctx,
const StringVal& src) {
auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr);
int64_t val = src_bitmap->intersect_count();
StringVal result(ctx, sizeof(int64_t));
*(int64_t*)result.ptr = val;
delete src_bitmap;
return result;
}
template void BitmapFunctions::bitmap_update_int<TinyIntVal>(FunctionContext* ctx,
const TinyIntVal& src, StringVal* dst);
template void BitmapFunctions::bitmap_update_int<SmallIntVal>(FunctionContext* ctx,
@ -903,4 +1023,78 @@ template BigIntVal BitmapFunctions::bitmap_intersect_finalize<DecimalV2Value>(Fu
template BigIntVal BitmapFunctions::bitmap_intersect_finalize<StringValue>(FunctionContext* ctx,
const StringVal& src);
template void BitmapFunctions::orthogonal_bitmap_intersect_count_init<int8_t, TinyIntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_count_init<int16_t, SmallIntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_count_init<int32_t, IntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_count_init<int64_t, BigIntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_count_init<float, FloatVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_count_init<double, DoubleVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_count_init<StringValue, StringVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_init<int8_t, TinyIntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_init<int16_t, SmallIntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_init<int32_t, IntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_init<int64_t, BigIntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_init<float, FloatVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_init<double, DoubleVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::orthogonal_bitmap_intersect_init<StringValue, StringVal>(
FunctionContext* ctx, StringVal* dst);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize<int8_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize<int16_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize<int32_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize<int64_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize<float>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize<double>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize<StringValue>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize<int8_t>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize<int16_t>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize<int32_t>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize<int64_t>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize<float>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize<double>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize<StringValue>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize<int8_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize<int16_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize<int32_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize<int64_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize<float>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize<double>(
FunctionContext* ctx, const StringVal& src);
template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize<StringValue>(
FunctionContext* ctx, const StringVal& src);
} // namespace doris

View File

@ -114,6 +114,31 @@ public:
const BigIntVal& cardinality_limit);
static StringVal sub_bitmap(FunctionContext* ctx, const StringVal& src, const BigIntVal& offset,
const BigIntVal& cardinality_limit);
static void orthogonal_bitmap_union_count_init(FunctionContext* ctx, StringVal* slot);
static StringVal orthogonal_bitmap_count_serialize(FunctionContext* ctx, const StringVal& src);
static void orthogonal_bitmap_count_merge(FunctionContext* context, const StringVal& src,
StringVal* dst);
static BigIntVal orthogonal_bitmap_count_finalize(FunctionContext* context,
const StringVal& src);
// orthogonal intersect and intersect count
template <typename T, typename ValType>
static void orthogonal_bitmap_intersect_count_init(FunctionContext* ctx, StringVal* dst);
template <typename T, typename ValType>
static void orthogonal_bitmap_intersect_init(FunctionContext* ctx, StringVal* dst);
template <typename T>
static StringVal orthogonal_bitmap_intersect_serialize(FunctionContext* ctx,
const StringVal& src);
template <typename T>
static BigIntVal orthogonal_bitmap_intersect_finalize(FunctionContext* ctx,
const StringVal& src);
// orthogonal_bitmap_intersect_count_serialize
template <typename T>
static StringVal orthogonal_bitmap_intersect_count_serialize(FunctionContext* ctx,
const StringVal& src);
};
} // namespace doris
#endif //DORIS_BE_SRC_QUERY_EXPRS_BITMAP_FUNCTION_H

View File

@ -50,6 +50,6 @@ add_library(Webserver STATIC
action/meta_action.cpp
action/compaction_action.cpp
action/config_action.cpp
action/check_rpc_channel_action
action/reset_rpc_channel_action
action/check_rpc_channel_action.cpp
action/reset_rpc_channel_action.cpp
)

View File

@ -28,7 +28,12 @@
// object serves as the interface object between the UDF/UDA and the doris process.
namespace doris {
class FunctionContextImpl;
}
struct StringValue;
struct BitmapValue;
struct DecimalV2Value;
struct DateTimeValue;
struct CollectionValue;
} // namespace doris
namespace doris_udf {

View File

@ -61,6 +61,13 @@ endif()
# Just for clang-tidy: -Wno-expansion-to-defined -Wno-deprecated-declaration
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -g -ggdb -std=c++11 -Wall -Werror -Wno-unused-variable -Wno-expansion-to-defined -Wno-deprecated-declarations -O3")
if (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DISABLE_COLORED_BUILD)
# Turn on colored output. https://github.com/ninja-build/ninja/wiki/FAQ
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
endif ()
message(STATUS "Compiler Flags: ${CMAKE_CXX_FLAGS}")
# Include udf
@ -72,6 +79,5 @@ set_target_properties(udf PROPERTIES IMPORTED_LOCATION $ENV{DORIS_HOME}/output/u
# Add the subdirector of new UDF in here
add_subdirectory(${SRC_DIR}/udf_samples)
add_subdirectory(${SRC_DIR}/udaf_orthogonal_bitmap)
install(DIRECTORY DESTINATION ${OUTPUT_DIR})

View File

@ -1,92 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# where to put generated libraries
set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udaf_orthogonal_bitmap")
# where to put generated binaries
set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/udaf_orthogonal_bitmap")
# set CMAKE_BUILD_TARGET_ARCH
# use `lscpu | grep 'Architecture' | awk '{print $2}'` only support system which language is en_US.UTF-8
execute_process(COMMAND bash "-c" "uname -m"
OUTPUT_VARIABLE
CMAKE_BUILD_TARGET_ARCH
OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "Build target arch is ${CMAKE_BUILD_TARGET_ARCH}")
# Set dirs
set(SRC_DIR "$ENV{DORIS_HOME}/be/src/")
set(THIRDPARTY_DIR "$ENV{DORIS_THIRDPARTY}/installed/")
# Set include dirs
include_directories(./)
include_directories(${THIRDPARTY_DIR}/include/)
# message
message(STATUS "base dir is ${BASE_DIR}")
message(STATUS "doris home dir is $ENV{DORIS_HOME}")
message(STATUS "src dir is ${SRC_DIR}")
message(STATUS "libroaring dir is ${THIRDPARTY_DIR}/lib/libroaring.a")
message(STATUS "thirdparty dir is $ENV{DORIS_THIRDPARTY}")
# Set all libraries
add_library(roaring STATIC IMPORTED)
set_target_properties(roaring PROPERTIES IMPORTED_LOCATION
${THIRDPARTY_DIR}/lib/libroaring.a)
# Set FLAGS
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall -Wno-sign-compare -Wno-unknown-pragmas -pthread")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fno-strict-aliasing -fno-omit-frame-pointer")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -std=gnu++11 -D__STDC_FORMAT_MACROS")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated -Wno-vla")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-function")
if ("${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86" OR "${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86_64")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -msse4.2")
endif()
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-attributes -DS2_USE_GFLAGS -DS2_USE_GLOG")
# For any gcc builds:
# -g: Enable symbols for profiler tools
# -Wno-unused-local-typedefs: Do not warn for local typedefs that are unused.
set(CXX_GCC_FLAGS "-g -Wno-unused-local-typedefs -O0 -gdwarf-2 -DNDEBUG")
SET(CMAKE_CXX_FLAGS ${CXX_GCC_FLAGS})
SET(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")
message(STATUS "Compiler Flags: ${CMAKE_CXX_FLAGS}")
# static link gcc's lib
set(LINK_LIBS
-Wl,--whole-archive
roaring
udf
-Wl,--no-whole-archive
-static-libstdc++
-static-libgcc
)
set(DIR_SRCS
./orthogonal_bitmap_function.cpp
)
add_library(udaf_orthogonal_bitmap SHARED ${DIR_SRCS})
target_link_libraries(udaf_orthogonal_bitmap
${LINK_LIBS}
)

File diff suppressed because it is too large Load Diff

View File

@ -1,492 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "orthogonal_bitmap_function.h"
#include "bitmap_value.h"
#include "string_value.h"
#include <iostream>
namespace doris_udf {
namespace detail {
const int DATETIME_PACKED_TIME_BYTE_SIZE = 8;
const int DATETIME_TYPE_BYTE_SIZE = 4;
const int DECIMAL_BYTE_SIZE = 16;
// get_val start
template<typename ValType, typename T>
T get_val(const ValType& x) {
return x.val;
}
template<>
StringValue get_val(const StringVal& x) {
return StringValue::from_string_val(x);
}
// get_val end
// serialize_size start
template<typename T>
int32_t serialize_size(const T& v) {
return sizeof(T);
}
template<>
int32_t serialize_size(const StringValue& v) {
return v.len + 4;
}
// serialize_size end
// write_to start
template<typename T>
char* write_to(const T& v, char* dest) {
size_t type_size = sizeof(T);
memcpy(dest, &v, type_size);
dest += type_size;
return dest;
}
template<>
char* write_to(const StringValue& v, char* dest) {
*(int32_t*)dest = v.len;
dest += 4;
memcpy(dest, v.ptr, v.len);
dest += v.len;
return dest;
}
// write_to end
// read_from start
template<typename T>
void read_from(const char** src, T* result) {
size_t type_size = sizeof(T);
memcpy(result, *src, type_size);
*src += type_size;
}
template<>
void read_from(const char** src, StringValue* result) {
int32_t length = *(int32_t*)(*src);
*src += 4;
*result = StringValue((char *)*src, length);
*src += length;
}
// read_from end
} // namespace detail
static StringVal serialize(FunctionContext* ctx, BitmapValue* value) {
StringVal result(ctx, value->getSizeInBytes());
value->write((char*) result.ptr);
return result;
}
// Calculate the intersection of two or more bitmaps
template<typename T>
struct BitmapIntersect {
public:
BitmapIntersect() {}
explicit BitmapIntersect(const char* src) {
deserialize(src);
}
void add_key(const T key) {
BitmapValue empty_bitmap;
_bitmaps[key] = empty_bitmap;
}
void update(const T& key, const BitmapValue& bitmap) {
if (_bitmaps.find(key) != _bitmaps.end()) {
_bitmaps[key] |= bitmap;
}
}
void merge(const BitmapIntersect& other) {
for (auto& kv: other._bitmaps) {
if (_bitmaps.find(kv.first) != _bitmaps.end()) {
_bitmaps[kv.first] |= kv.second;
} else {
_bitmaps[kv.first] = kv.second;
}
}
}
// calculate the intersection for _bitmaps's bitmap values
int64_t intersect_count() const {
if (_bitmaps.empty()) {
return 0;
}
BitmapValue result;
auto it = _bitmaps.begin();
result |= it->second;
it++;
for (;it != _bitmaps.end(); it++) {
result &= it->second;
}
return result.cardinality();
}
// intersection
BitmapValue intersect() {
BitmapValue result;
auto it = _bitmaps.begin();
result |= it->second;
it++;
for (;it != _bitmaps.end(); it++) {
result &= it->second;
}
return result;
}
// the serialize size
size_t size() {
size_t size = 4;
for (auto& kv: _bitmaps) {
size += detail::serialize_size(kv.first);;
size += kv.second.getSizeInBytes();
}
return size;
}
//must call size() first
void serialize(char* dest) {
char* writer = dest;
*(int32_t*)writer = _bitmaps.size();
writer += 4;
for (auto& kv: _bitmaps) {
writer = detail::write_to(kv.first, writer);
kv.second.write(writer);
writer += kv.second.getSizeInBytes();
}
}
void deserialize(const char* src) {
const char* reader = src;
int32_t bitmaps_size = *(int32_t*)reader;
reader += 4;
for (int32_t i = 0; i < bitmaps_size; i++) {
T key;
detail::read_from(&reader, &key);
BitmapValue bitmap(reader);
reader += bitmap.getSizeInBytes();
_bitmaps[key] = bitmap;
}
}
private:
std::map<T, BitmapValue> _bitmaps;
};
void OrthogonalBitmapFunctions::init() {
}
void OrthogonalBitmapFunctions::bitmap_union_count_init(FunctionContext* ctx, StringVal* dst) {
dst->is_null = false;
dst->len = sizeof(BitmapValue);
dst->ptr = (uint8_t*)new BitmapValue();
}
void OrthogonalBitmapFunctions::bitmap_union(FunctionContext* ctx, const StringVal& src, StringVal* dst) {
if (src.is_null) {
return;
}
auto dst_bitmap = reinterpret_cast<BitmapValue*>(dst->ptr);
// zero size means the src input is a agg object
if (src.len == 0) {
(*dst_bitmap) |= *reinterpret_cast<BitmapValue*>(src.ptr);
} else {
(*dst_bitmap) |= BitmapValue((char*) src.ptr);
}
}
StringVal OrthogonalBitmapFunctions::bitmap_serialize(FunctionContext* ctx, const StringVal& src) {
if (src.is_null) {
return src;
}
auto src_bitmap = reinterpret_cast<BitmapValue*>(src.ptr);
StringVal result = serialize(ctx, src_bitmap);
delete src_bitmap;
return result;
}
StringVal OrthogonalBitmapFunctions::bitmap_count_serialize(FunctionContext* ctx, const StringVal& src) {
if (src.is_null) {
return src;
}
auto src_bitmap = reinterpret_cast<BitmapValue*>(src.ptr);
int64_t val = src_bitmap->cardinality();
StringVal result(ctx, sizeof(int64_t));
*(int64_t*)result.ptr = val;
delete src_bitmap;
return result;
}
// This is a init function for bitmap_intersect.
template<typename T, typename ValType>
void OrthogonalBitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) {
// constant args start from index 2
if (ctx->get_num_constant_args() > 1) {
dst->is_null = false;
dst->len = sizeof(BitmapIntersect<T>);
auto intersect = new BitmapIntersect<T>();
for (int i = 2; i < ctx->get_num_constant_args(); ++i) {
ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i));
intersect->add_key(detail::get_val<ValType, T>(*arg));
}
dst->ptr = (uint8_t*)intersect;
} else {
dst->is_null = false;
dst->len = sizeof(BitmapValue);
dst->ptr = (uint8_t*)new BitmapValue();
}
}
// This is a init function for intersect_count.
template<typename T, typename ValType>
void OrthogonalBitmapFunctions::bitmap_intersect_count_init(FunctionContext* ctx, StringVal* dst) {
if (ctx->get_num_constant_args() > 1) {
dst->is_null = false;
dst->len = sizeof(BitmapIntersect<T>);
auto intersect = new BitmapIntersect<T>();
// constant args start from index 2
for (int i = 2; i < ctx->get_num_constant_args(); ++i) {
ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i));
intersect->add_key(detail::get_val<ValType, T>(*arg));
}
dst->ptr = (uint8_t*)intersect;
} else {
dst->is_null = false;
dst->len = sizeof(int64_t);
dst->ptr = (uint8_t*)new int64_t;
*(int64_t *)dst->ptr = 0;
}
}
template<typename T, typename ValType>
void OrthogonalBitmapFunctions::bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, const ValType& key,
int num_key, const ValType* keys, const StringVal* dst) {
auto* dst_bitmap = reinterpret_cast<BitmapIntersect<T>*>(dst->ptr);
// zero size means the src input is a agg object
if (src.len == 0) {
dst_bitmap->update(detail::get_val<ValType, T>(key), *reinterpret_cast<BitmapValue*>(src.ptr));
} else {
dst_bitmap->update(detail::get_val<ValType, T>(key), BitmapValue((char*)src.ptr));
}
}
template<typename T>
void OrthogonalBitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, const StringVal& src, const StringVal* dst) {
auto* dst_bitmap = reinterpret_cast<BitmapIntersect<T>*>(dst->ptr);
dst_bitmap->merge(BitmapIntersect<T>((char*)src.ptr));
}
template<typename T>
StringVal OrthogonalBitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, const StringVal& src) {
auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr);
StringVal result(ctx, src_bitmap->size());
src_bitmap->serialize((char*)result.ptr);
delete src_bitmap;
return result;
}
template<typename T>
BigIntVal OrthogonalBitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src) {
auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr);
BigIntVal result = BigIntVal(src_bitmap->intersect_count());
delete src_bitmap;
return result;
}
void OrthogonalBitmapFunctions::bitmap_count_merge(FunctionContext* context, const StringVal& src, StringVal* dst) {
if (dst->len != sizeof(int64_t)) {
auto dst_bitmap = reinterpret_cast<BitmapValue*>(dst->ptr);
delete dst_bitmap;
dst->is_null = false;
dst->len = sizeof(int64_t);
dst->ptr = (uint8_t*)new int64_t;
*(int64_t *)dst->ptr = 0;
}
*(int64_t *)dst->ptr += *(int64_t *)src.ptr;
}
BigIntVal OrthogonalBitmapFunctions::bitmap_count_finalize(FunctionContext* context, const StringVal& src) {
auto *pval = reinterpret_cast<int64_t *>(src.ptr);
int64_t result = *pval;
delete pval;
return result;
}
template<typename T>
StringVal OrthogonalBitmapFunctions::bitmap_intersect_count_serialize(FunctionContext* ctx, const StringVal& src) {
auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr);
int64_t val = src_bitmap->intersect_count();
StringVal result(ctx, sizeof(int64_t));
*(int64_t*)result.ptr = val;
delete src_bitmap;
return result;
}
template<typename T>
StringVal OrthogonalBitmapFunctions::bitmap_intersect_and_serialize(FunctionContext* ctx, const StringVal& src) {
auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr);
BitmapValue bitmap_val = src_bitmap->intersect();
StringVal result = serialize(ctx, &bitmap_val);
delete src_bitmap;
return result;
}
template void OrthogonalBitmapFunctions::bitmap_intersect_count_init<int8_t, TinyIntVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_count_init<int16_t, SmallIntVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_count_init<int32_t, IntVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_count_init<int64_t, BigIntVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_count_init<float, FloatVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_count_init<double, DoubleVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_count_init<StringValue, StringVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_init<int8_t, TinyIntVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_init<int16_t, SmallIntVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_init<int32_t, IntVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_init<int64_t, BigIntVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_init<float, FloatVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_init<double, DoubleVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_init<StringValue, StringVal>(
FunctionContext* ctx, StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_update<int8_t, TinyIntVal>(
FunctionContext* ctx, const StringVal& src, const TinyIntVal& key,
int num_key, const TinyIntVal* keys, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_update<int16_t, SmallIntVal>(
FunctionContext* ctx, const StringVal& src, const SmallIntVal& key,
int num_key, const SmallIntVal* keys, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_update<int32_t, IntVal>(
FunctionContext* ctx, const StringVal& src, const IntVal& key,
int num_key, const IntVal* keys, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_update<int64_t, BigIntVal>(
FunctionContext* ctx, const StringVal& src, const BigIntVal& key,
int num_key, const BigIntVal* keys, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_update<float, FloatVal>(
FunctionContext* ctx, const StringVal& src, const FloatVal& key,
int num_key, const FloatVal* keys, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_update<double, DoubleVal>(
FunctionContext* ctx, const StringVal& src, const DoubleVal& key,
int num_key, const DoubleVal* keys, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_update<StringValue, StringVal>(
FunctionContext* ctx, const StringVal& src, const StringVal& key,
int num_key, const StringVal* keys, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_merge<int8_t>(
FunctionContext* ctx, const StringVal& src, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_merge<int16_t>(
FunctionContext* ctx, const StringVal& src, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_merge<int32_t>(
FunctionContext* ctx, const StringVal& src, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_merge<int64_t>(
FunctionContext* ctx, const StringVal& src, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_merge<float>(
FunctionContext* ctx, const StringVal& src, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_merge<double>(
FunctionContext* ctx, const StringVal& src, const StringVal* dst);
template void OrthogonalBitmapFunctions::bitmap_intersect_merge<StringValue>(
FunctionContext* ctx, const StringVal& src, const StringVal* dst);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_serialize<int8_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_serialize<int16_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_serialize<int32_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_serialize<int64_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_serialize<float>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_serialize<double>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_serialize<StringValue>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal OrthogonalBitmapFunctions::bitmap_intersect_finalize<int8_t>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal OrthogonalBitmapFunctions::bitmap_intersect_finalize<int16_t>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal OrthogonalBitmapFunctions::bitmap_intersect_finalize<int32_t>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal OrthogonalBitmapFunctions::bitmap_intersect_finalize<int64_t>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal OrthogonalBitmapFunctions::bitmap_intersect_finalize<float>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal OrthogonalBitmapFunctions::bitmap_intersect_finalize<double>(
FunctionContext* ctx, const StringVal& src);
template BigIntVal OrthogonalBitmapFunctions::bitmap_intersect_finalize<StringValue>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_count_serialize<int8_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_count_serialize<int16_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_count_serialize<int32_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_count_serialize<int64_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_count_serialize<float>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_count_serialize<double>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_count_serialize<StringValue>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_and_serialize<int8_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_and_serialize<int16_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_and_serialize<int32_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_and_serialize<int64_t>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_and_serialize<float>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_and_serialize<double>(
FunctionContext* ctx, const StringVal& src);
template StringVal OrthogonalBitmapFunctions::bitmap_intersect_and_serialize<StringValue>(
FunctionContext* ctx, const StringVal& src);
}

View File

@ -1,62 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "udf.h"
namespace doris_udf {
class OrthogonalBitmapFunctions {
public:
static void init();
static void bitmap_union_count_init(FunctionContext* ctx, StringVal* slot);
static void bitmap_union(FunctionContext* ctx, const StringVal& src, StringVal* dst);
static StringVal bitmap_serialize(FunctionContext* ctx, const StringVal& src);
// bitmap_count_serialize
static StringVal bitmap_count_serialize(FunctionContext* ctx, const StringVal& src);
// count_merge
static void bitmap_count_merge(FunctionContext* context, const StringVal& src, StringVal* dst);
// count_finalize
static BigIntVal bitmap_count_finalize(FunctionContext* context, const StringVal& src);
// intersect and intersect count
template<typename T, typename ValType>
static void bitmap_intersect_count_init(FunctionContext* ctx, StringVal* dst);
template<typename T, typename ValType>
static void bitmap_intersect_init(FunctionContext* ctx, StringVal* dst);
template<typename T, typename ValType>
static void bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, const ValType& key,
int num_key, const ValType* keys, const StringVal* dst);
template<typename T>
static void bitmap_intersect_merge(FunctionContext* ctx, const StringVal& src, const StringVal* dst);
template<typename T>
static StringVal bitmap_intersect_serialize(FunctionContext* ctx, const StringVal& src);
template<typename T>
static BigIntVal bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src);
// bitmap_intersect_count_serialize
template<typename T>
static StringVal bitmap_intersect_count_serialize(FunctionContext* ctx, const StringVal& src);
// bitmap_intersect_and_serialize
template<typename T>
static StringVal bitmap_intersect_and_serialize(FunctionContext* ctx, const StringVal& src);
};
}

View File

@ -1,148 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef DORIS_CONTRIB_UDF_SRC_UDAF_BITMAP_STRING_VALUE_H
#define DORIS_CONTRIB_UDF_SRC_UDAF_BITMAP_STRING_VALUE_H
#include <string.h>
#include "udf.h"
namespace doris_udf {
// The format of a string-typed slot.
// The returned StringValue of all functions that return StringValue
// shares its buffer the parent.
struct StringValue {
static const int MAX_LENGTH = (1 << 30);
// TODO: change ptr to an offset relative to a contiguous memory block,
// so that we can send row batches between nodes without having to swizzle
// pointers
// NOTE: This struct should keep the same memory layout with Slice, otherwise
// it will lead to BE crash.
// TODO(zc): we should unify this struct with Slice some day.
char* ptr;
size_t len;
StringValue(char* ptr, int len) : ptr(ptr), len(len) {}
StringValue() : ptr(nullptr), len(0) {}
/// Construct a StringValue from 's'. 's' must be valid for as long as
/// this object is valid.
explicit StringValue(const std::string& s) : ptr(const_cast<char*>(s.c_str())), len(s.size()) {}
void replace(char* ptr, int len) {
this->ptr = ptr;
this->len = len;
}
static int string_compare(const char* s1, int n1, const char* s2, int n2, int len) {
int result = strncmp(s1, s2, len);
if (result != 0) {
return result;
}
return n1 - n2;
}
// Byte-by-byte comparison. Returns:
// this < other: -1
// this == other: 0
// this > other: 1
int compare(const StringValue& other) const {
int l = std::min(len, other.len);
if (l == 0) {
if (len == other.len) {
return 0;
} else if (len == 0) {
return -1;
} else {
return 1;
}
}
return string_compare(this->ptr, this->len, other.ptr, other.len, l);
}
// ==
bool eq(const StringValue& other) const {
if (this->len != other.len) {
return false;
}
return string_compare(this->ptr, this->len, other.ptr, other.len, this->len) == 0;
}
bool operator==(const StringValue& other) const { return eq(other); }
// !=
bool ne(const StringValue& other) const { return !eq(other); }
// <=
bool le(const StringValue& other) const { return compare(other) <= 0; }
// >=
bool ge(const StringValue& other) const { return compare(other) >= 0; }
// <
bool lt(const StringValue& other) const { return compare(other) < 0; }
// >
bool gt(const StringValue& other) const { return compare(other) > 0; }
bool operator!=(const StringValue& other) const { return ne(other); }
bool operator<=(const StringValue& other) const { return le(other); }
bool operator>=(const StringValue& other) const { return ge(other); }
bool operator<(const StringValue& other) const { return lt(other); }
bool operator>(const StringValue& other) const { return gt(other); }
std::string debug_string() const { return std::string(ptr, len); }
std::string to_string() const { return std::string(ptr, len); }
// Returns the substring starting at start_pos until the end of string.
StringValue substring(int start_pos) const {
return StringValue(ptr + start_pos, len - start_pos);
}
// Returns the substring starting at start_pos with given length.
// If new_len < 0 then the substring from start_pos to end of string is returned.
StringValue substring(int start_pos, int new_len) const {
return StringValue(ptr + start_pos, (new_len < 0) ? (len - start_pos) : new_len);
}
// Trims leading and trailing spaces.
StringValue trim() const {
// Remove leading and trailing spaces.
int32_t begin = 0;
while (begin < len && ptr[begin] == ' ') {
++begin;
}
int32_t end = len - 1;
while (end > begin && ptr[end] == ' ') {
--end;
}
return StringValue(ptr + begin, end - begin + 1);
}
void to_string_val(doris_udf::StringVal* sv) const {
*sv = doris_udf::StringVal(reinterpret_cast<uint8_t*>(ptr), len);
}
static StringValue from_string_val(const doris_udf::StringVal& sv) {
return StringValue(reinterpret_cast<char*>(sv.ptr), sv.len);
}
};
} // namespace doris_udf
#endif

View File

@ -212,6 +212,7 @@ module.exports = [
"variables",
"update",
"multi-tenant",
"orthogonal-bitmap-manual",
],
sidebarDepth: 1,
},
@ -243,13 +244,6 @@ module.exports = [
children: [
"contribute-udf",
"user-defined-function",
{
title: "Users contribute UDF",
directoryPath: "contrib/",
children:[
"udaf-orthogonal-bitmap-manual",
],
},
],
},
],
@ -431,6 +425,9 @@ module.exports = [
"bitmap_xor",
"to_bitmap",
"bitmap_max",
"orthogonal_bitmap_intersect",
"orthogonal_bitmap_intersect_count",
"orthogonal_bitmap_union_count",
],
},
{

View File

@ -213,6 +213,7 @@ module.exports = [
"variables",
"update",
"multi-tenant",
"orthogonal-bitmap-manual",
],
sidebarDepth: 1,
},
@ -244,13 +245,6 @@ module.exports = [
children: [
"contribute-udf",
"user-defined-function",
{
title: "用户贡献的 UDF",
directoryPath: "contrib/",
children:[
"udaf-orthogonal-bitmap-manual",
],
},
],
},
],
@ -435,6 +429,9 @@ module.exports = [
"bitmap_xor",
"to_bitmap",
"bitmap_max",
"orthogonal_bitmap_intersect",
"orthogonal_bitmap_intersect_count",
"orthogonal_bitmap_union_count",
],
},
{

View File

@ -1,6 +1,6 @@
---
{
"title": "Orthogonal BITMAP calculation UDAF",
"title": "Orthogonal BITMAP calculation",
"language": "en"
}
---
@ -24,7 +24,7 @@ specific language governing permissions and limitations
under the License.
-->
# Orthogonal BITMAP calculation UDAF
# Orthogonal BITMAP calculation
## Background
@ -35,10 +35,7 @@ The solution is to divide the bitmap column values according to the range, and t
## User guide
1. Create a table and add hid column to represent bitmap column value ID range as hash bucket column
2. Data tank library: When loading data, divide the range of bitmap column values
3. Compile UDAF and produce. So dynamic library
4. Register the UDAF in DORIS, which loads the.so library at run time
5. Usage scenarios
2. Usage scenarios
### Create table
@ -90,46 +87,6 @@ Note: the first column represents the user tags, which have been converted from
When loading data, vertically cut the bitmap value range of the user. For example, the hid value of the user ID in the range of 1-5000000 is the same, and the row with the same HID value will be allocated into a sub-bucket, so that the bitmap value in each sub-bucket is orthogonal. On the UDAF implementation of bitmap, the orthogonal feature of bitmap value in the bucket can be used to perform intersection union calculation, and the calculation results will be shuffled to the top node for aggregation.
### Source code and compilation
Source code:
```
contrib/udf/src/udaf_orthogonal_bitmap/
|-- bitmap_value.h
|-- CMakeLists.txt
|-- orthogonal_bitmap_function.cpp
|-- orthogonal_bitmap_function.h
-- string_value.h
```
Compile udaf:
```
$cd contrib/udf
$ sh build_udf.sh
```
libudaf_orthogonal_bitmap.so output directory:
```
output/contrib/udf/lib/udaf_orthogonal_bitmap/libudaf_orthogonal_bitmap.so
```
### Register the UDAF with DORIS
Setting parameters before Doris query
```
set parallel_fragment_exec_instance_num=5
```
Note: set concurrency parameters according to cluster conditions to improve concurrent computing performance
The new UDAF aggregate function is created in mysql client link Session. It is created by registering the function symbol, which is loaded as a dynamic library.
#### orthogonal_bitmap_intersect
The bitmap intersection function
@ -144,28 +101,7 @@ the first parameter is the bitmap column, the second parameter is the dimension
Explain:
on the basis of this table schema, this udaf has two levels of aggregation in query planning. In the first layer, be nodes (update and serialize) first press filter_ Values are used to hash aggregate the keys, and then the bitmaps of all keys are intersected. The results are serialized and sent to the second level be nodes (merge and finalize). In the second level be nodes, all the bitmap values from the first level nodes are combined circularly
Create UDAF:
```
drop FUNCTION orthogonal_bitmap_intersect(BITMAP,BIGINT,BIGINT, ...);
CREATE AGGREGATE FUNCTION orthogonal_bitmap_intersect(BITMAP,BIGINT,BIGINT, ...) RETURNS BITMAP INTERMEDIATE varchar(1)
PROPERTIES (
"init_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions21bitmap_intersect_initIlNS_9BigIntValEEEvPNS_15FunctionContextEPNS_9StringValE",
"update_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions23bitmap_intersect_updateIlNS_9BigIntValEEEvPNS_15FunctionContextERKNS_9StringValERKT0_iPS9_PS6_",
"serialize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions30bitmap_intersect_and_serializeIlEENS_9StringValEPNS_15FunctionContextERKS2_",
"merge_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions12bitmap_unionEPNS_15FunctionContextERKNS_9StringValEPS3_",
"finalize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions16bitmap_serializeEPNS_15FunctionContextERKNS_9StringValE",
"object_file"="http://ip:port/libudaf_orthogonal_bitmap.so" );
```
Note:
1. column_to_filter, filter_values column is set to bigint type here;
2. the function symbol passes through nm /xxx/xxx/libudaf_orthogonal_bitmap.so|grep "bitmap_"
on the basis of this table schema, this function has two levels of aggregation in query planning. In the first layer, be nodes (update and serialize) first press filter_ Values are used to hash aggregate the keys, and then the bitmaps of all keys are intersected. The results are serialized and sent to the second level be nodes (merge and finalize). In the second level be nodes, all the bitmap values from the first level nodes are combined circularly
Example:
@ -190,19 +126,6 @@ Explain:
on the basis of this table schema, the query planning aggregation is divided into two layers. In the first layer, be nodes (update and serialize) first press filter_ Values are used to hash aggregate the keys, and then the intersection of bitmaps of all keys is performed, and then the intersection results are counted. The count values are serialized and sent to the second level be nodes (merge and finalize). In the second level be nodes, the sum of all the count values from the first level nodes is calculated circularly
Create UDAF:
```
drop FUNCTION orthogonal_bitmap_intersect_count(BITMAP,BIGINT,BIGINT, ...);
CREATE AGGREGATE FUNCTION orthogonal_bitmap_intersect_count(BITMAP,BIGINT,BIGINT, ...) RETURNS BIGINT INTERMEDIATE varchar(1)
PROPERTIES (
"init_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions27bitmap_intersect_count_initIlNS_9BigIntValEEEvPNS_15FunctionContextEPNS_9StringValE",
"update_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions23bitmap_intersect_updateIlNS_9BigIntValEEEvPNS_15FunctionContextERKNS_9StringValERKT0_iPS9_PS6_",
"serialize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions32bitmap_intersect_count_serializeIlEENS_9StringValEPNS_15FunctionContextERKS2_",
"merge_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions18bitmap_count_mergeEPNS_15FunctionContextERKNS_9StringValEPS3_",
"finalize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions21bitmap_count_finalizeEPNS_15FunctionContextERKNS_9StringValE",
"object_file"="http://ip:port/libudaf_orthogonal_bitmap.so" );
```
#### orthogonal_bitmap_union_count
@ -214,21 +137,8 @@ orthogonal_bitmap_union_count(bitmap_column)
Explain:
on the basis of this table schema, this udaf is divided into two layers. In the first layer, be nodes (update and serialize) merge all the bitmaps, and then count the resulting bitmaps. The count values are serialized and sent to the second level be nodes (merge and finalize). In the second layer, the be nodes are used to calculate the sum of all the count values from the first level nodes
on the basis of this table schema, this function is divided into two layers. In the first layer, be nodes (update and serialize) merge all the bitmaps, and then count the resulting bitmaps. The count values are serialized and sent to the second level be nodes (merge and finalize). In the second layer, the be nodes are used to calculate the sum of all the count values from the first level nodes
Create UDAF:
```
drop FUNCTION orthogonal_bitmap_union_count(BITMAP);
CREATE AGGREGATE FUNCTION orthogonal_bitmap_union_count(BITMAP) RETURNS BIGINT INTERMEDIATE varchar(1)
PROPERTIES (
"init_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions23bitmap_union_count_initEPNS_15FunctionContextEPNS_9StringValE",
"update_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions12bitmap_unionEPNS_15FunctionContextERKNS_9StringValEPS3_",
"serialize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions22bitmap_count_serializeEPNS_15FunctionContextERKNS_9StringValE",
"merge_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions18bitmap_count_mergeEPNS_15FunctionContextERKNS_9StringValEPS3_",
"finalize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions21bitmap_count_finalizeEPNS_15FunctionContextERKNS_9StringValE",
"object_file"="http://ip:port/libudaf_orthogonal_bitmap.so" );
```
### Suitable for the scene

View File

@ -0,0 +1,47 @@
---
{
"title": "orthogonal_bitmap_intersect",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# orthogonal_bitmap_intersect
## description
### Syntax
`BITMAP ORTHOGONAL_BITMAP_INTERSECT(bitmap_column, column_to_filter, filter_values)`
The bitmap intersection function, the first parameter is the bitmap column, the second parameter is the dimension column for filtering, and the third parameter is the variable length parameter, which means different values of the filter dimension column
## example
```
mysql> select orthogonal_bitmap_intersect(members, tag_group, 1150000, 1150001, 390006) from tag_map where tag_group in ( 1150000, 1150001, 390006);
+-------------------------------------------------------------------------------+
| orthogonal_bitmap_intersect(`members`, `tag_group`, 1150000, 1150001, 390006) |
+-------------------------------------------------------------------------------+
| NULL |
+-------------------------------------------------------------------------------+
1 row in set (3.505 sec)
```
## keyword
ORTHOGONAL_BITMAP_INTERSECT,BITMAP

View File

@ -0,0 +1,46 @@
---
{
"title": "orthogonal_bitmap_intersect_count",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# orthogonal_bitmap_intersect_count
## description
### Syntax
`BITMAP ORTHOGONAL_BITMAP_INTERSECT_COUNT(bitmap_column, column_to_filter, filter_values)`
The bitmap intersection count function, the first parameter is the bitmap column, the second parameter is the dimension column for filtering, and the third parameter is the variable length parameter, which means different values of the filter dimension column
## example
```
mysql> select orthogonal_bitmap_intersect_count(members, tag_group, 1150000, 1150001, 390006) from tag_map where tag_group in ( 1150000, 1150001, 390006);
+-------------------------------------------------------------------------------------+
| orthogonal_bitmap_intersect_count(`members`, `tag_group`, 1150000, 1150001, 390006) |
+-------------------------------------------------------------------------------------+
| 0 |
+-------------------------------------------------------------------------------------+
1 row in set (3.382 sec)
```
## keyword
ORTHOGONAL_BITMAP_INTERSECT_COUNT,BITMAP

View File

@ -0,0 +1,46 @@
---
{
"title": "orthogonal_bitmap_union_count",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# orthogonal_bitmap_union_count
## description
### Syntax
`BITMAP ORTHOGONAL_BITMAP_UNION_COUNT(bitmap_column, column_to_filter, filter_values)`
Figure out the bitmap union count function, syntax with the original bitmap_union_count, but the implementation is different.
## example
```
mysql> select orthogonal_bitmap_union_count(members) from tag_map where tag_group in ( 1150000, 1150001, 390006);
+------------------------------------------+
| orthogonal_bitmap_union_count(`members`) |
+------------------------------------------+
| 286957811 |
+------------------------------------------+
1 row in set (2.645 sec)
```
## keyword
ORTHOGONAL_BITMAP_UNION_COUNT,BITMAP

View File

@ -1,6 +1,6 @@
---
{
"title": "正交的BITMAP计算UDAF",
"title": "正交的BITMAP计算",
"language": "zh-CN"
}
---
@ -24,7 +24,7 @@ specific language governing permissions and limitations
under the License.
-->
# 正交的BITMAP计算UDAF
# 正交的BITMAP计算
## 背景
@ -35,10 +35,7 @@ Doris原有的Bitmap聚合函数设计比较通用,但对亿级别以上bitmap
## 使用指南
1. 建表,增加hid列,表示bitmap列值id范围, 作为hash分桶列
2. 数据灌库,load数据时,对bitmap列值range范围划分
3. 编译UDAF,产出.so动态库
4. 在DORIS中注册UDAF,DORIS在运行时加载.so库
5. 使用场景
2. 使用场景
### Create table
@ -86,38 +83,9 @@ user_id = to_bitmap(tmp_user_id)
```
注:第一列代表用户标签,由中文转换成数字
load数据时,对用户bitmap值range范围纵向切割,例如,用户id在1-5000000范围内的hid值相同,hid值相同的行会分配到一个分桶内,如此每个分桶内到的bitmap都是正交的。在bitmap的udaf实现上,可以利用桶内bitmap值正交特性,进行交并集计算,计算结果会被shuffle至top节点聚合。
load数据时,对用户bitmap值range范围纵向切割,例如,用户id在1-5000000范围内的hid值相同,hid值相同的行会分配到一个分桶内,如此每个分桶内到的bitmap都是正交的。可以利用桶内bitmap值正交特性,进行交并集计算,计算结果会被shuffle至top节点聚合。
### 编译UDAF
源代码:
```
contrib/udf/src/udaf_orthogonal_bitmap/
|-- bitmap_value.h
|-- CMakeLists.txt
|-- orthogonal_bitmap_function.cpp
|-- orthogonal_bitmap_function.h
`-- string_value.h
```
编译UDAF:
```
$cd contrib/udf
$ sh build_udf.sh
```
libudaf_orthogonal_bitmap.so产出目录:
```
output/contrib/udf/lib/udaf_orthogonal_bitmap/libudaf_orthogonal_bitmap.so
```
### 在DORIS中注册UDAF
Doris查询前设置参数
```
set parallel_fragment_exec_instance_num=5
```
注:根据集群情况设置并发参数,提高并发计算性能
新udaf聚合函数在mysql客户端链接session中创建,创建时需要注册函数符号,函数符号会以动态库.so的方式加载。
#### bitmap_orthogonal_intersect
@ -134,25 +102,6 @@ set parallel_fragment_exec_instance_num=5
说明:
查询规划上聚合分2层,在第一层be节点(update、serialize)先按filter_values为key进行hash聚合,然后对所有key的bitmap求交集,结果序列化后发送至第二层be节点(merge、finalize),在第二层be节点对所有来源于第一层节点的bitmap值循环求并集
创建UDAF:
```
drop FUNCTION orthogonal_bitmap_intersect(BITMAP,BIGINT,BIGINT, ...);
CREATE AGGREGATE FUNCTION orthogonal_bitmap_intersect(BITMAP,BIGINT,BIGINT, ...) RETURNS BITMAP INTERMEDIATE varchar(1)
PROPERTIES (
"init_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions21bitmap_intersect_initIlNS_9BigIntValEEEvPNS_15FunctionContextEPNS_9StringValE",
"update_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions23bitmap_intersect_updateIlNS_9BigIntValEEEvPNS_15FunctionContextERKNS_9StringValERKT0_iPS9_PS6_",
"serialize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions30bitmap_intersect_and_serializeIlEENS_9StringValEPNS_15FunctionContextERKS2_",
"merge_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions12bitmap_unionEPNS_15FunctionContextERKNS_9StringValEPS3_",
"finalize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions16bitmap_serializeEPNS_15FunctionContextERKNS_9StringValE",
"object_file"="http://ip:port/libudaf_orthogonal_bitmap.so" );
```
注意:
1.column_to_filter, filter_values列这里设置为BIGINT类型;
2.函数符号通过nm /xxx/xxx/libudaf_orthogonal_bitmap.so|grep "xxx" 查找
样例:
```
@ -175,19 +124,6 @@ select BITMAP_COUNT(orthogonal_bitmap_intersect(user_id, tag, 13080800, 11110200
说明:
查询规划聚合上分2层,在第一层be节点(update、serialize)先按filter_values为key进行hash聚合,然后对所有key的bitmap求交集,再对交集结果求count,count值序列化后发送至第二层be节点(merge、finalize),在第二层be节点对所有来源于第一层节点的count值循环求sum
创建UDAF:
```
drop FUNCTION orthogonal_bitmap_intersect_count(BITMAP,BIGINT,BIGINT, ...);
CREATE AGGREGATE FUNCTION orthogonal_bitmap_intersect_count(BITMAP,BIGINT,BIGINT, ...) RETURNS BIGINT INTERMEDIATE varchar(1)
PROPERTIES (
"init_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions27bitmap_intersect_count_initIlNS_9BigIntValEEEvPNS_15FunctionContextEPNS_9StringValE",
"update_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions23bitmap_intersect_updateIlNS_9BigIntValEEEvPNS_15FunctionContextERKNS_9StringValERKT0_iPS9_PS6_",
"serialize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions32bitmap_intersect_count_serializeIlEENS_9StringValEPNS_15FunctionContextERKS2_",
"merge_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions18bitmap_count_mergeEPNS_15FunctionContextERKNS_9StringValEPS3_",
"finalize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions21bitmap_count_finalizeEPNS_15FunctionContextERKNS_9StringValE",
"object_file"="http://ip:port/libudaf_orthogonal_bitmap.so" );
```
#### orthogonal_bitmap_union_count
@ -205,19 +141,6 @@ PROPERTIES (
查询规划上分2层,在第一层be节点(update、serialize)对所有bitmap求并集,再对并集的结果bitmap求count,count值序列化后发送至第二层be节点(merge、finalize),在第二层be节点对所有来源于第一层节点的count值循环求sum
创建UDAF:
```
drop FUNCTION orthogonal_bitmap_union_count(BITMAP);
CREATE AGGREGATE FUNCTION orthogonal_bitmap_union_count(BITMAP) RETURNS BIGINT INTERMEDIATE varchar(1)
PROPERTIES (
"init_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions23bitmap_union_count_initEPNS_15FunctionContextEPNS_9StringValE",
"update_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions12bitmap_unionEPNS_15FunctionContextERKNS_9StringValEPS3_",
"serialize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions22bitmap_count_serializeEPNS_15FunctionContextERKNS_9StringValE",
"merge_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions18bitmap_count_mergeEPNS_15FunctionContextERKNS_9StringValEPS3_",
"finalize_fn"="_ZN9doris_udf25OrthogonalBitmapFunctions21bitmap_count_finalizeEPNS_15FunctionContextERKNS_9StringValE",
"object_file"="http://ip:port/libudaf_orthogonal_bitmap.so" );
```
### 使用场景
符合对bitmap进行正交计算的场景,如在用户行为分析中,计算留存,漏斗,用户画像等。

View File

@ -0,0 +1,47 @@
---
{
"title": "orthogonal_bitmap_intersect",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# orthogonal_bitmap_intersect
## description
### Syntax
`BITMAP ORTHOGONAL_BITMAP_INTERSECT(bitmap_column, column_to_filter, filter_values)`
求bitmap交集函数, 第一个参数是Bitmap列,第二个参数是用来过滤的维度列,第三个参数是变长参数,含义是过滤维度列的不同取值
## example
```
mysql> select orthogonal_bitmap_intersect(members, tag_group, 1150000, 1150001, 390006) from tag_map where tag_group in ( 1150000, 1150001, 390006);
+-------------------------------------------------------------------------------+
| orthogonal_bitmap_intersect(`members`, `tag_group`, 1150000, 1150001, 390006) |
+-------------------------------------------------------------------------------+
| NULL |
+-------------------------------------------------------------------------------+
1 row in set (3.505 sec)
```
## keyword
ORTHOGONAL_BITMAP_INTERSECT,BITMAP

View File

@ -0,0 +1,46 @@
---
{
"title": "orthogonal_bitmap_intersect_count",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# orthogonal_bitmap_intersect_count
## description
### Syntax
`BITMAP ORTHOGONAL_BITMAP_INTERSECT_COUNT(bitmap_column, column_to_filter, filter_values)`
求bitmap交集大小的函数, 第一个参数是Bitmap列,第二个参数是用来过滤的维度列,第三个参数是变长参数,含义是过滤维度列的不同取值
## example
```
mysql> select orthogonal_bitmap_intersect_count(members, tag_group, 1150000, 1150001, 390006) from tag_map where tag_group in ( 1150000, 1150001, 390006);
+-------------------------------------------------------------------------------------+
| orthogonal_bitmap_intersect_count(`members`, `tag_group`, 1150000, 1150001, 390006) |
+-------------------------------------------------------------------------------------+
| 0 |
+-------------------------------------------------------------------------------------+
1 row in set (3.382 sec)
```
## keyword
ORTHOGONAL_BITMAP_INTERSECT_COUNT,BITMAP

View File

@ -0,0 +1,47 @@
---
{
"title": "orthogonal_bitmap_union_count",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# orthogonal_bitmap_union_count
## description
### Syntax
`BITMAP ORTHOGONAL_BITMAP_UNION_COUNT(bitmap_column, column_to_filter, filter_values)`
求bitmap并集大小的函数, 参数类型是bitmap,是待求并集count的列
## example
```
mysql> select orthogonal_bitmap_union_count(members) from tag_map where tag_group in ( 1150000, 1150001, 390006);
+------------------------------------------+
| orthogonal_bitmap_union_count(`members`) |
+------------------------------------------+
| 286957811 |
+------------------------------------------+
1 row in set (2.645 sec)
```
## keyword
ORTHOGONAL_BITMAP_UNION_COUNT,BITMAP

View File

@ -756,6 +756,95 @@ public class FunctionSet<min_initIN9doris_udf12DecimalV2ValEEEvPNS2_15FunctionCo
public static final String BITMAP_COUNT = "bitmap_count";
public static final String INTERSECT_COUNT = "intersect_count";
public static final String BITMAP_INTERSECT = "bitmap_intersect";
public static final String ORTHOGONAL_BITMAP_INTERSECT = "orthogonal_bitmap_intersect";
public static final String ORTHOGONAL_BITMAP_INTERSECT_COUNT = "orthogonal_bitmap_intersect_count";
public static final String ORTHOGONAL_BITMAP_UNION_COUNT = "orthogonal_bitmap_union_count";
private static final Map<Type, String> ORTHOGONAL_BITMAP_INTERSECT_INIT_SYMBOL =
ImmutableMap.<Type, String>builder()
.put(Type.TINYINT,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_intersect_initIaN9doris_udf10TinyIntValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.SMALLINT,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_intersect_initIsN9doris_udf11SmallIntValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.INT,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_intersect_initIiN9doris_udf6IntValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.BIGINT,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_intersect_initIlN9doris_udf9BigIntValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.FLOAT,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_intersect_initIfN9doris_udf8FloatValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.DOUBLE,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_intersect_initIdN9doris_udf9DoubleValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.CHAR,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_intersect_initINS_11StringValueEN9doris_udf9StringValEEEvPNS3_15FunctionContextEPS4_")
.put(Type.VARCHAR,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_intersect_initINS_11StringValueEN9doris_udf9StringValEEEvPNS3_15FunctionContextEPS4_")
.put(Type.STRING,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_intersect_initINS_11StringValueEN9doris_udf9StringValEEEvPNS3_15FunctionContextEPS4_")
.build();
private static final Map<Type, String> ORTHOGONAL_BITMAP_INTERSECT_SERIALIZE_SYMBOL =
ImmutableMap.<Type, String>builder()
.put(Type.TINYINT,
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_intersect_serializeIaEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.SMALLINT,
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_intersect_serializeIsEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.INT,
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_intersect_serializeIiEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.BIGINT,
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_intersect_serializeIlEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.FLOAT,
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_intersect_serializeIfEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.DOUBLE,
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_intersect_serializeIdEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.CHAR,
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_intersect_serializeINS_11StringValueEEEN9doris_udf9StringValEPNS3_15FunctionContextERKS4_")
.put(Type.VARCHAR,
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_intersect_serializeINS_11StringValueEEEN9doris_udf9StringValEPNS3_15FunctionContextERKS4_")
.put(Type.STRING,
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_intersect_serializeINS_11StringValueEEEN9doris_udf9StringValEPNS3_15FunctionContextERKS4_")
.build();
private static final Map<Type, String> ORTHOGONAL_BITMAP_INTERSECT_COUNT_INIT_SYMBOL =
ImmutableMap.<Type, String>builder()
.put(Type.TINYINT,
"_ZN5doris15BitmapFunctions38orthogonal_bitmap_intersect_count_initIaN9doris_udf10TinyIntValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.SMALLINT,
"_ZN5doris15BitmapFunctions38orthogonal_bitmap_intersect_count_initIsN9doris_udf11SmallIntValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.INT,
"_ZN5doris15BitmapFunctions38orthogonal_bitmap_intersect_count_initIiN9doris_udf6IntValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.BIGINT,
"_ZN5doris15BitmapFunctions38orthogonal_bitmap_intersect_count_initIlN9doris_udf9BigIntValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.FLOAT,
"_ZN5doris15BitmapFunctions38orthogonal_bitmap_intersect_count_initIfN9doris_udf8FloatValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.DOUBLE,
"_ZN5doris15BitmapFunctions38orthogonal_bitmap_intersect_count_initIdN9doris_udf9DoubleValEEEvPNS2_15FunctionContextEPNS2_9StringValE")
.put(Type.CHAR,
"_ZN5doris15BitmapFunctions38orthogonal_bitmap_intersect_count_initINS_11StringValueEN9doris_udf9StringValEEEvPNS3_15FunctionContextEPS4_")
.put(Type.VARCHAR,
"_ZN5doris15BitmapFunctions38orthogonal_bitmap_intersect_count_initINS_11StringValueEN9doris_udf9StringValEEEvPNS3_15FunctionContextEPS4_")
.put(Type.STRING,
"_ZN5doris15BitmapFunctions38orthogonal_bitmap_intersect_count_initINS_11StringValueEN9doris_udf9StringValEEEvPNS3_15FunctionContextEPS4_")
.build();
private static final Map<Type, String> ORTHOGONAL_BITMAP_INTERSECT_COUNT_SERIALIZE_SYMBOL =
ImmutableMap.<Type, String>builder()
.put(Type.TINYINT,
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_intersect_count_serializeIaEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.SMALLINT,
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_intersect_count_serializeIsEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.INT,
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_intersect_count_serializeIiEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.BIGINT,
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_intersect_count_serializeIlEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.FLOAT,
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_intersect_count_serializeIfEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.DOUBLE,
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_intersect_count_serializeIdEEN9doris_udf9StringValEPNS2_15FunctionContextERKS3_")
.put(Type.CHAR,
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_intersect_count_serializeINS_11StringValueEEEN9doris_udf9StringValEPNS3_15FunctionContextERKS4_")
.put(Type.VARCHAR,
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_intersect_count_serializeINS_11StringValueEEEN9doris_udf9StringValEPNS3_15FunctionContextERKS4_")
.put(Type.STRING,
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_intersect_count_serializeINS_11StringValueEEEN9doris_udf9StringValEPNS3_15FunctionContextERKS4_")
.build();
private static final Map<Type, String> BITMAP_UNION_INT_SYMBOL =
ImmutableMap.<Type, String>builder()
@ -1713,6 +1802,36 @@ public class FunctionSet<min_initIN9doris_udf12DecimalV2ValEEEvPNS2_15FunctionCo
null, false, true, false, true));
}
Type[] types = {Type.SMALLINT, Type.TINYINT, Type.INT, Type.BIGINT, Type.FLOAT, Type.DOUBLE, Type.CHAR,
Type.VARCHAR, Type.STRING};
for (Type t : types) {
addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_INTERSECT,
Lists.newArrayList(Type.BITMAP, t,t),
Type.BITMAP,
Type.VARCHAR,
true,
ORTHOGONAL_BITMAP_INTERSECT_INIT_SYMBOL.get(t),
BITMAP_INTERSECT_UPDATE_SYMBOL.get(t),
"_ZN5doris15BitmapFunctions12bitmap_unionEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
ORTHOGONAL_BITMAP_INTERSECT_SERIALIZE_SYMBOL.get(t),
"",
"",
"_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
true, false, true));
addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_INTERSECT_COUNT,
Lists.newArrayList(Type.BITMAP, t,t),
Type.BIGINT,
Type.VARCHAR,
true,
ORTHOGONAL_BITMAP_INTERSECT_COUNT_INIT_SYMBOL.get(t),
BITMAP_INTERSECT_UPDATE_SYMBOL.get(t),
"_ZN5doris15BitmapFunctions29orthogonal_bitmap_count_mergeEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
ORTHOGONAL_BITMAP_INTERSECT_COUNT_SERIALIZE_SYMBOL.get(t),
"",
"",
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_count_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
true, false, true));
}
// bitmap
addBuiltin(AggregateFunction.createBuiltin(BITMAP_UNION, Lists.newArrayList(Type.BITMAP),
Type.BITMAP,
@ -1758,7 +1877,18 @@ public class FunctionSet<min_initIN9doris_udf12DecimalV2ValEEEvPNS2_15FunctionCo
null,
"",
true, true, true, true));
addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_UNION_COUNT, Lists.newArrayList(Type.BITMAP),
Type.BIGINT,
Type.VARCHAR,
"_ZN5doris15BitmapFunctions34orthogonal_bitmap_union_count_initEPN9doris_udf15FunctionContextEPNS1_9StringValE",
"_ZN5doris15BitmapFunctions12bitmap_unionEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
"_ZN5doris15BitmapFunctions29orthogonal_bitmap_count_mergeEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
"_ZN5doris15BitmapFunctions33orthogonal_bitmap_count_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
null,
null,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_count_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
true, true, true));
// TODO(ml): supply function symbol
addBuiltin(AggregateFunction.createBuiltin(BITMAP_INTERSECT, Lists.newArrayList(Type.BITMAP),
Type.BITMAP, Type.VARCHAR,