131 lines
4.6 KiB
C++
131 lines
4.6 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "exprs/hll_function.h"
|
|
|
|
#include "exprs/anyval_util.h"
|
|
#include "util/hash_util.hpp"
|
|
#include "util/slice.h"
|
|
|
|
namespace doris {
|
|
|
|
using doris_udf::BigIntVal;
|
|
using doris_udf::StringVal;
|
|
|
|
void HllFunctions::init() {}
|
|
|
|
StringVal HllFunctions::hll_hash(FunctionContext* ctx, const StringVal& input) {
|
|
return AnyValUtil::from_string_temp(ctx, hll_hash(input));
|
|
}
|
|
|
|
std::string HllFunctions::hll_hash(const StringVal& input) {
|
|
HyperLogLog hll;
|
|
if (!input.is_null) {
|
|
uint64_t hash_value = HashUtil::murmur_hash64A(input.ptr, input.len, HashUtil::MURMUR_SEED);
|
|
hll.update(hash_value);
|
|
}
|
|
std::string buf;
|
|
buf.resize(hll.max_serialized_size());
|
|
buf.resize(hll.serialize((uint8_t*)buf.c_str()));
|
|
|
|
return buf;
|
|
}
|
|
|
|
void HllFunctions::hll_init(FunctionContext*, StringVal* dst) {
|
|
dst->is_null = false;
|
|
dst->len = sizeof(HyperLogLog);
|
|
dst->ptr = (uint8_t*)new HyperLogLog();
|
|
}
|
|
|
|
StringVal HllFunctions::hll_empty(FunctionContext* ctx) {
|
|
return AnyValUtil::from_string_temp(ctx, HyperLogLog::empty());
|
|
}
|
|
|
|
template <typename T>
|
|
void HllFunctions::hll_update(FunctionContext*, const T& src, StringVal* dst) {
|
|
if (src.is_null) {
|
|
return;
|
|
}
|
|
|
|
uint64_t hash_value = AnyValUtil::hash64_murmur(src, HashUtil::MURMUR_SEED);
|
|
if (hash_value != 0) {
|
|
auto* dst_hll = reinterpret_cast<HyperLogLog*>(dst->ptr);
|
|
dst_hll->update(hash_value);
|
|
}
|
|
}
|
|
|
|
void HllFunctions::hll_merge(FunctionContext*, const StringVal& src, StringVal* dst) {
|
|
if (src.is_null) {
|
|
return;
|
|
}
|
|
auto* dst_hll = reinterpret_cast<HyperLogLog*>(dst->ptr);
|
|
// zero size means the src input is a agg object
|
|
if (src.len == 0) {
|
|
dst_hll->merge(*reinterpret_cast<HyperLogLog*>(src.ptr));
|
|
} else {
|
|
dst_hll->merge(HyperLogLog(Slice(src.ptr, src.len)));
|
|
}
|
|
}
|
|
|
|
BigIntVal HllFunctions::hll_finalize(FunctionContext*, const StringVal& src) {
|
|
auto* src_hll = reinterpret_cast<HyperLogLog*>(src.ptr);
|
|
BigIntVal result(src_hll->estimate_cardinality());
|
|
delete src_hll;
|
|
return result;
|
|
}
|
|
|
|
BigIntVal HllFunctions::hll_get_value(FunctionContext*, const StringVal& src) {
|
|
if (src.is_null) {
|
|
return BigIntVal::null();
|
|
}
|
|
auto* src_hll = reinterpret_cast<HyperLogLog*>(src.ptr);
|
|
BigIntVal result(src_hll->estimate_cardinality());
|
|
return result;
|
|
}
|
|
|
|
BigIntVal HllFunctions::hll_cardinality(FunctionContext* ctx, const StringVal& input) {
|
|
if (input.is_null) {
|
|
return BigIntVal();
|
|
}
|
|
StringVal dst;
|
|
hll_init(ctx, &dst);
|
|
hll_merge(ctx, input, &dst);
|
|
return hll_finalize(ctx, dst);
|
|
}
|
|
|
|
StringVal HllFunctions::hll_serialize(FunctionContext* ctx, const StringVal& src) {
|
|
auto* src_hll = reinterpret_cast<HyperLogLog*>(src.ptr);
|
|
StringVal result(ctx, src_hll->max_serialized_size());
|
|
int size = src_hll->serialize((uint8_t*)result.ptr);
|
|
result.resize(ctx, size);
|
|
delete src_hll;
|
|
return result;
|
|
}
|
|
|
|
template void HllFunctions::hll_update(FunctionContext*, const BooleanVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const TinyIntVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const SmallIntVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const IntVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const BigIntVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const FloatVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const DoubleVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const StringVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const DateTimeVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const LargeIntVal&, StringVal*);
|
|
template void HllFunctions::hll_update(FunctionContext*, const DecimalV2Val&, StringVal*);
|
|
} // namespace doris
|