Encapsulate HLL logic (#1756)
This commit is contained in:
@ -15,62 +15,34 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "exprs/hll_hash_function.h"
|
||||
|
||||
#include "exprs/expr.h"
|
||||
#include "runtime/tuple_row.h"
|
||||
#include "runtime/datetime_value.h"
|
||||
#include "util/path_builder.h"
|
||||
#include "runtime/string_value.hpp"
|
||||
#include "exprs/aggregate_functions.h"
|
||||
#include "exprs/cast_functions.h"
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/utils.h"
|
||||
#include "exprs/hll_hash_function.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
using doris_udf::BigIntVal;
|
||||
using doris_udf::StringVal;
|
||||
|
||||
const int HllHashFunctions::HLL_INIT_EXPLICT_SET_SIZE = 10;
|
||||
const int HllHashFunctions::HLL_EMPTY_SET_SIZE = 1;
|
||||
|
||||
void HllHashFunctions::init() {
|
||||
}
|
||||
|
||||
StringVal HllHashFunctions::create_string_result(doris_udf::FunctionContext* ctx,
|
||||
const StringVal& val, const bool is_null) {
|
||||
StringVal result;
|
||||
if (is_null) {
|
||||
// HLL_DATA_EMPTY
|
||||
char buf[HLL_EMPTY_SET_SIZE];
|
||||
buf[0] = HLL_DATA_EMPTY;
|
||||
result = AnyValUtil::from_buffer_temp(ctx, buf, sizeof(buf));
|
||||
StringVal HllHashFunctions::hll_hash(FunctionContext* ctx, const StringVal& input) {
|
||||
const int HLL_SINGLE_VALUE_SIZE = 10;
|
||||
const int HLL_EMPTY_SIZE = 1;
|
||||
std::string buf;
|
||||
std::unique_ptr<HyperLogLog> hll {new HyperLogLog()};
|
||||
if (!input.is_null) {
|
||||
uint64_t hash_value = HashUtil::murmur_hash64A(input.ptr, input.len, HashUtil::MURMUR_SEED);
|
||||
hll.reset(new HyperLogLog(hash_value));
|
||||
buf.resize(HLL_SINGLE_VALUE_SIZE);
|
||||
} else {
|
||||
// HLL_DATA_EXPLHLL_DATA_EXPLICIT
|
||||
uint64_t hash = HashUtil::murmur_hash64A(val.ptr, val.len, HashUtil::MURMUR_SEED);
|
||||
char buf[HLL_INIT_EXPLICT_SET_SIZE];
|
||||
buf[0] = HLL_DATA_EXPLICIT;
|
||||
buf[1] = 1;
|
||||
*((uint64_t*)(buf + 2)) = hash;
|
||||
result = AnyValUtil::from_buffer_temp(ctx, buf, sizeof(buf));
|
||||
}
|
||||
return result;
|
||||
buf.resize(HLL_EMPTY_SIZE);
|
||||
}
|
||||
hll->serialize((char*)buf.c_str());
|
||||
return AnyValUtil::from_string_temp(ctx, buf);
|
||||
}
|
||||
|
||||
StringVal HllHashFunctions::hll_hash(doris_udf::FunctionContext* ctx,
|
||||
const StringVal& input) {
|
||||
return create_string_result(ctx, input, input.is_null);
|
||||
}
|
||||
|
||||
StringVal HllHashFunctions::hll_cardinality(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& dest_base) {
|
||||
BigIntVal intVal = hll_cardinality(ctx, static_cast<const HllVal&> (dest_base));
|
||||
return AnyValUtil::from_string_temp(ctx, std::to_string(intVal.val));
|
||||
}
|
||||
|
||||
BigIntVal HllHashFunctions::hll_cardinality(doris_udf::FunctionContext* ctx,
|
||||
const HllVal& input) {
|
||||
BigIntVal HllHashFunctions::hll_cardinality(FunctionContext* ctx, const HllVal& input) {
|
||||
if (input.is_null) {
|
||||
return BigIntVal::null();
|
||||
}
|
||||
@ -79,4 +51,5 @@ BigIntVal HllHashFunctions::hll_cardinality(doris_udf::FunctionContext* ctx,
|
||||
AggregateFunctions::hll_union_agg_update(ctx, input, &dst);
|
||||
return AggregateFunctions::hll_union_agg_finalize(ctx, dst);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user