Encapsulate HLL logic (#1756)

This commit is contained in:
kangkaisen
2019-09-09 15:52:10 +08:00
committed by ZHAO Chun
parent a349409838
commit cd5cfea5cc
19 changed files with 706 additions and 372 deletions

View File

@ -15,62 +15,34 @@
// specific language governing permissions and limitations
// under the License.
#include "exprs/hll_hash_function.h"
#include "exprs/expr.h"
#include "runtime/tuple_row.h"
#include "runtime/datetime_value.h"
#include "util/path_builder.h"
#include "runtime/string_value.hpp"
#include "exprs/aggregate_functions.h"
#include "exprs/cast_functions.h"
#include "olap/olap_common.h"
#include "olap/utils.h"
#include "exprs/hll_hash_function.h"
namespace doris {
using doris_udf::BigIntVal;
using doris_udf::StringVal;
const int HllHashFunctions::HLL_INIT_EXPLICT_SET_SIZE = 10;
const int HllHashFunctions::HLL_EMPTY_SET_SIZE = 1;
void HllHashFunctions::init() {
}
StringVal HllHashFunctions::create_string_result(doris_udf::FunctionContext* ctx,
const StringVal& val, const bool is_null) {
StringVal result;
if (is_null) {
// HLL_DATA_EMPTY
char buf[HLL_EMPTY_SET_SIZE];
buf[0] = HLL_DATA_EMPTY;
result = AnyValUtil::from_buffer_temp(ctx, buf, sizeof(buf));
StringVal HllHashFunctions::hll_hash(FunctionContext* ctx, const StringVal& input) {
const int HLL_SINGLE_VALUE_SIZE = 10;
const int HLL_EMPTY_SIZE = 1;
std::string buf;
std::unique_ptr<HyperLogLog> hll {new HyperLogLog()};
if (!input.is_null) {
uint64_t hash_value = HashUtil::murmur_hash64A(input.ptr, input.len, HashUtil::MURMUR_SEED);
hll.reset(new HyperLogLog(hash_value));
buf.resize(HLL_SINGLE_VALUE_SIZE);
} else {
// HLL_DATA_EXPLHLL_DATA_EXPLICIT
uint64_t hash = HashUtil::murmur_hash64A(val.ptr, val.len, HashUtil::MURMUR_SEED);
char buf[HLL_INIT_EXPLICT_SET_SIZE];
buf[0] = HLL_DATA_EXPLICIT;
buf[1] = 1;
*((uint64_t*)(buf + 2)) = hash;
result = AnyValUtil::from_buffer_temp(ctx, buf, sizeof(buf));
}
return result;
buf.resize(HLL_EMPTY_SIZE);
}
hll->serialize((char*)buf.c_str());
return AnyValUtil::from_string_temp(ctx, buf);
}
StringVal HllHashFunctions::hll_hash(doris_udf::FunctionContext* ctx,
const StringVal& input) {
return create_string_result(ctx, input, input.is_null);
}
StringVal HllHashFunctions::hll_cardinality(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& dest_base) {
BigIntVal intVal = hll_cardinality(ctx, static_cast<const HllVal&> (dest_base));
return AnyValUtil::from_string_temp(ctx, std::to_string(intVal.val));
}
BigIntVal HllHashFunctions::hll_cardinality(doris_udf::FunctionContext* ctx,
const HllVal& input) {
BigIntVal HllHashFunctions::hll_cardinality(FunctionContext* ctx, const HllVal& input) {
if (input.is_null) {
return BigIntVal::null();
}
@ -79,4 +51,5 @@ BigIntVal HllHashFunctions::hll_cardinality(doris_udf::FunctionContext* ctx,
AggregateFunctions::hll_union_agg_update(ctx, input, &dst);
return AggregateFunctions::hll_union_agg_finalize(ctx, dst);
}
}