// Modifications copyright (C) 2017, Baidu.com, Inc. // Copyright 2017 The Apache Software Foundation // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "exprs/aggregate_functions.h" #include #include #include #include "common/logging.h" #include "runtime/string_value.h" #include "runtime/datetime_value.h" #include "exprs/anyval_util.h" #include "exprs/hybird_set.h" #include "util/debug_util.h" // TODO: this file should be cross compiled and then all of the builtin // aggregate functions will have a codegen enabled path. Then we can remove // the custom code in aggregation node. namespace palo { using palo_udf::FunctionContext; using palo_udf::BooleanVal; using palo_udf::TinyIntVal; using palo_udf::SmallIntVal; using palo_udf::IntVal; using palo_udf::BigIntVal; using palo_udf::LargeIntVal; using palo_udf::FloatVal; using palo_udf::DoubleVal; using palo_udf::DecimalVal; using palo_udf::DateTimeVal; using palo_udf::StringVal; using palo_udf::AnyVal; // Delimiter to use if the separator is NULL. static const StringVal DEFAULT_STRING_CONCAT_DELIM((uint8_t*)", ", 2); // Hyperloglog precision. Default taken from paper. Doesn't seem to matter very // much when between [6,12] const int HLL_PRECISION = 14; const int HLL_SETS_BYTES_NUM = 16384; void AggregateFunctions::init_null(FunctionContext*, AnyVal* dst) { dst->is_null = true; } template void AggregateFunctions::init_zero(FunctionContext*, T* dst) { dst->is_null = false; dst->val = 0; } template<> void AggregateFunctions::init_zero(FunctionContext*, DecimalVal* dst) { dst->set_to_zero(); } template void AggregateFunctions::sum_remove(FunctionContext* ctx, const SRC_VAL& src, DST_VAL* dst) { // Do not count null values towards the number of removes if (src.is_null) { ctx->impl()->increment_num_removes(-1); } if (ctx->impl()->num_removes() >= ctx->impl()->num_updates()) { *dst = DST_VAL::null(); return; } if (src.is_null) { return; } if (dst->is_null) { init_zero(ctx, dst); } dst->val -= src.val; } template<> void AggregateFunctions::sum_remove(FunctionContext* ctx, const DecimalVal& src, DecimalVal* dst) { if (ctx->impl()->num_removes() >= ctx->impl()->num_updates()) { *dst = DecimalVal::null(); return; } if (src.is_null) { return; } if (dst->is_null) { init_zero(ctx, dst); } DecimalValue new_src = DecimalValue::from_decimal_val(src); DecimalValue new_dst = DecimalValue::from_decimal_val(*dst); new_dst = new_dst - new_src; new_dst.to_decimal_val(dst); } StringVal AggregateFunctions::string_val_get_value( FunctionContext* ctx, const StringVal& src) { if (src.is_null) { return src; } StringVal result(ctx, src.len); memcpy(result.ptr, src.ptr, src.len); return result; } StringVal AggregateFunctions::string_val_serialize_or_finalize( FunctionContext* ctx, const StringVal& src) { StringVal result = string_val_get_value(ctx, src); if (!src.is_null) { ctx->free(src.ptr); } return result; } void AggregateFunctions::count_update( FunctionContext*, const AnyVal& src, BigIntVal* dst) { DCHECK(!dst->is_null); if (!src.is_null) { ++dst->val; } } void AggregateFunctions::count_merge(FunctionContext*, const BigIntVal& src, BigIntVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); dst->val += src.val; } void AggregateFunctions::count_remove( FunctionContext*, const AnyVal& src, BigIntVal* dst) { DCHECK(!dst->is_null); if (!src.is_null) { --dst->val; DCHECK_GE(dst->val, 0); } } struct AvgState { double sum; int64_t count; }; struct DecimalAvgState { DecimalVal sum; int64_t count; }; void AggregateFunctions::avg_init(FunctionContext* ctx, StringVal* dst) { dst->is_null = false; dst->len = sizeof(AvgState); dst->ptr = ctx->allocate(dst->len); memset(dst->ptr, 0, sizeof(AvgState)); } void AggregateFunctions::decimal_avg_init(FunctionContext* ctx, StringVal* dst) { dst->is_null = false; dst->len = sizeof(DecimalAvgState); dst->ptr = ctx->allocate(dst->len); // memset(dst->ptr, 0, sizeof(DecimalAvgState)); DecimalAvgState* avg = reinterpret_cast(dst->ptr); avg->count = 0; avg->sum.set_to_zero(); } template void AggregateFunctions::avg_update(FunctionContext* ctx, const T& src, StringVal* dst) { if (src.is_null) { return; } DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(AvgState), dst->len); AvgState* avg = reinterpret_cast(dst->ptr); avg->sum += src.val; ++avg->count; } void AggregateFunctions::decimal_avg_update(FunctionContext* ctx, const DecimalVal& src, StringVal* dst) { if (src.is_null) { return; } DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(DecimalAvgState), dst->len); DecimalAvgState* avg = reinterpret_cast(dst->ptr); DecimalValue v1 = DecimalValue::from_decimal_val(avg->sum); DecimalValue v2 = DecimalValue::from_decimal_val(src); DecimalValue v = v1 + v2; v.to_decimal_val(&avg->sum); ++avg->count; } template void AggregateFunctions::avg_remove(FunctionContext* ctx, const T& src, StringVal* dst) { // Remove doesn't need to explicitly check the number of calls to Update() or Remove() // because Finalize() returns NULL if count is 0. if (src.is_null) { return; } DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(AvgState), dst->len); AvgState* avg = reinterpret_cast(dst->ptr); avg->sum -= src.val; --avg->count; DCHECK_GE(avg->count, 0); } void AggregateFunctions::decimal_avg_remove(palo_udf::FunctionContext* ctx, const DecimalVal& src, StringVal* dst) { // Remove doesn't need to explicitly check the number of calls to Update() or Remove() // because Finalize() returns NULL if count is 0. if (src.is_null) { return; } DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(DecimalAvgState), dst->len); DecimalAvgState* avg = reinterpret_cast(dst->ptr); DecimalValue v1 = DecimalValue::from_decimal_val(avg->sum); DecimalValue v2 = DecimalValue::from_decimal_val(src); DecimalValue v = v1 - v2; v.to_decimal_val(&avg->sum); --avg->count; DCHECK_GE(avg->count, 0); } void AggregateFunctions::avg_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst) { const AvgState* src_struct = reinterpret_cast(src.ptr); DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(AvgState), dst->len); AvgState* dst_struct = reinterpret_cast(dst->ptr); dst_struct->sum += src_struct->sum; dst_struct->count += src_struct->count; } void AggregateFunctions::decimal_avg_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst) { const DecimalAvgState* src_struct = reinterpret_cast(src.ptr); DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(DecimalAvgState), dst->len); DecimalAvgState* dst_struct = reinterpret_cast(dst->ptr); DecimalValue v1 = DecimalValue::from_decimal_val(dst_struct->sum); DecimalValue v2 = DecimalValue::from_decimal_val(src_struct->sum); DecimalValue v = v1 + v2; v.to_decimal_val(&dst_struct->sum); dst_struct->count += src_struct->count; } DoubleVal AggregateFunctions::avg_get_value(FunctionContext* ctx, const StringVal& src) { AvgState* val_struct = reinterpret_cast(src.ptr); if (val_struct->count == 0) { return DoubleVal::null(); } return DoubleVal(val_struct->sum / val_struct->count); } DecimalVal AggregateFunctions::decimal_avg_get_value(FunctionContext* ctx, const StringVal& src) { DecimalAvgState* val_struct = reinterpret_cast(src.ptr); if (val_struct->count == 0) { return DecimalVal::null(); } DecimalValue v1 = DecimalValue::from_decimal_val(val_struct->sum); DecimalValue v = v1 / DecimalValue(val_struct->count); DecimalVal res; v.to_decimal_val(&res); return res; } DoubleVal AggregateFunctions::avg_finalize(FunctionContext* ctx, const StringVal& src) { if (src.is_null) { return DoubleVal::null(); } DoubleVal result = avg_get_value(ctx, src); ctx->free(src.ptr); return result; } DecimalVal AggregateFunctions::decimal_avg_finalize(FunctionContext* ctx, const StringVal& src) { if (src.is_null) { return DecimalVal::null(); } DecimalVal result = decimal_avg_get_value(ctx, src); ctx->free(src.ptr); return result; } void AggregateFunctions::timestamp_avg_update(FunctionContext* ctx, const DateTimeVal& src, StringVal* dst) { if (src.is_null) { return; } DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(AvgState), dst->len); AvgState* avg = reinterpret_cast(dst->ptr); double val = DateTimeValue::from_datetime_val(src); avg->sum += val; ++avg->count; } void AggregateFunctions::timestamp_avg_remove(FunctionContext* ctx, const DateTimeVal& src, StringVal* dst) { if (src.is_null) { return; } DCHECK(dst->ptr != NULL); DCHECK_EQ(sizeof(AvgState), dst->len); AvgState* avg = reinterpret_cast(dst->ptr); double val = DateTimeValue::from_datetime_val(src); avg->sum -= val; --avg->count; DCHECK_GE(avg->count, 0); } DateTimeVal AggregateFunctions::timestamp_avg_get_value(FunctionContext* ctx, const StringVal& src) { AvgState* val_struct = reinterpret_cast(src.ptr); if (val_struct->count == 0) { return DateTimeVal::null(); } DateTimeValue tv(val_struct->sum / val_struct->count); DateTimeVal result; tv.to_datetime_val(&result); return result; } DateTimeVal AggregateFunctions::timestamp_avg_finalize(FunctionContext* ctx, const StringVal& src) { if (src.is_null) { return DateTimeVal::null(); } DateTimeVal result = timestamp_avg_get_value(ctx, src); ctx->free(src.ptr); return result; } void AggregateFunctions::count_star_update(FunctionContext*, BigIntVal* dst) { DCHECK(!dst->is_null); ++dst->val; } void AggregateFunctions::count_star_remove(FunctionContext*, BigIntVal* dst) { DCHECK(!dst->is_null); --dst->val; DCHECK_GE(dst->val, 0); } template void AggregateFunctions::sum(FunctionContext* ctx, const SRC_VAL& src, DST_VAL* dst) { if (src.is_null) { return; } if (dst->is_null) { init_zero(ctx, dst); } dst->val += src.val; } template<> void AggregateFunctions::sum(FunctionContext* ctx, const DecimalVal& src, DecimalVal* dst) { if (src.is_null) { return; } if (dst->is_null) { dst->is_null = false; dst->set_to_zero(); } DecimalValue new_src = DecimalValue::from_decimal_val(src); DecimalValue new_dst = DecimalValue::from_decimal_val(*dst); new_dst = new_dst + new_src; new_dst.to_decimal_val(dst); } template<> void AggregateFunctions::sum(FunctionContext* ctx, const LargeIntVal& src, LargeIntVal* dst) { if (src.is_null) { return; } if (dst->is_null) { dst->is_null = false; dst->val = 0; } dst->val += src.val; } template void AggregateFunctions::min(FunctionContext*, const T& src, T* dst) { if (src.is_null) { return; } if (dst->is_null || src.val < dst->val) { *dst = src; } } template void AggregateFunctions::max(FunctionContext*, const T& src, T* dst) { if (src.is_null) { return; } if (dst->is_null || src.val > dst->val) { *dst = src; } } template<> void AggregateFunctions::min(FunctionContext*, const DecimalVal& src, DecimalVal* dst) { if (src.is_null) { return; } if (dst->is_null) { *dst = src; } else { DecimalValue new_src = DecimalValue::from_decimal_val(src); DecimalValue new_dst = DecimalValue::from_decimal_val(*dst); if (new_src < new_dst) { *dst = src; } } } template<> void AggregateFunctions::min(FunctionContext*, const LargeIntVal& src, LargeIntVal* dst) { if (src.is_null) { return; } if (dst->is_null) { *dst = src; return; } if (src.val < dst->val) { dst->val = src.val; } } template<> void AggregateFunctions::max(FunctionContext*, const DecimalVal& src, DecimalVal* dst) { if (src.is_null) { return; } if (dst->is_null) { *dst = src; } else { DecimalValue new_src = DecimalValue::from_decimal_val(src); DecimalValue new_dst = DecimalValue::from_decimal_val(*dst); if (new_src > new_dst) { *dst = src; } } } template<> void AggregateFunctions::max(FunctionContext*, const LargeIntVal& src, LargeIntVal* dst) { if (src.is_null) { return; } if (dst->is_null) { *dst = src; return; } if (src.val > dst->val) { dst->val = src.val; } } void AggregateFunctions::init_null_string(FunctionContext* c, StringVal* dst) { dst->is_null = true; dst->ptr = NULL; dst->len = 0; } template<> void AggregateFunctions::min(FunctionContext* ctx, const StringVal& src, StringVal* dst) { if (src.is_null) { return; } if (dst->is_null || StringValue::from_string_val(src) < StringValue::from_string_val(*dst)) { if (!dst->is_null) { ctx->free(dst->ptr); } uint8_t* copy = ctx->allocate(src.len); memcpy(copy, src.ptr, src.len); *dst = StringVal(copy, src.len); } } template<> void AggregateFunctions::max(FunctionContext* ctx, const StringVal& src, StringVal* dst) { if (src.is_null) { return; } if (dst->is_null || StringValue::from_string_val(src) > StringValue::from_string_val(*dst)) { if (!dst->is_null) { ctx->free(dst->ptr); } uint8_t* copy = ctx->allocate(src.len); memcpy(copy, src.ptr, src.len); *dst = StringVal(copy, src.len); } } template<> void AggregateFunctions::min(FunctionContext*, const DateTimeVal& src, DateTimeVal* dst) { if (src.is_null) { return; } if (dst->is_null) { *dst = src; return; } DateTimeValue src_tv = DateTimeValue::from_datetime_val(src); DateTimeValue dst_tv = DateTimeValue::from_datetime_val(*dst); if (src_tv < dst_tv) { *dst = src; } } template<> void AggregateFunctions::max(FunctionContext*, const DateTimeVal& src, DateTimeVal* dst) { if (src.is_null) { return; } if (dst->is_null) { *dst = src; return; } DateTimeValue src_tv = DateTimeValue::from_datetime_val(src); DateTimeValue dst_tv = DateTimeValue::from_datetime_val(*dst); if (src_tv > dst_tv) { *dst = src; } } void AggregateFunctions::string_concat(FunctionContext* ctx, const StringVal& src, const StringVal& separator, StringVal* result) { if (src.is_null) { return; } if (result->is_null) { uint8_t* copy = ctx->allocate(src.len); memcpy(copy, src.ptr, src.len); *result = StringVal(copy, src.len); return; } const StringVal* sep_ptr = separator.is_null ? &DEFAULT_STRING_CONCAT_DELIM : &separator; int new_size = result->len + sep_ptr->len + src.len; result->ptr = ctx->reallocate(result->ptr, new_size); memcpy(result->ptr + result->len, sep_ptr->ptr, sep_ptr->len); result->len += sep_ptr->len; memcpy(result->ptr + result->len, src.ptr, src.len); result->len += src.len; } // StringConcat intermediate state starts with the length of the first // separator, followed by the accumulated string. The accumulated // string starts with the separator of the first value that arrived in // StringConcatUpdate(). typedef int StringConcatHeader; // Delimiter to use if the separator is NULL. void AggregateFunctions::string_concat_update(FunctionContext* ctx, const StringVal& src, StringVal* result) { string_concat_update(ctx, src, DEFAULT_STRING_CONCAT_DELIM, result); } void AggregateFunctions::string_concat_update(FunctionContext* ctx, const StringVal& src, const StringVal& separator, StringVal* result) { if (src.is_null) { return; } const StringVal* sep = separator.is_null ? &DEFAULT_STRING_CONCAT_DELIM : &separator; if (result->is_null) { // Header of the intermediate state holds the length of the first separator. const int header_len = sizeof(StringConcatHeader); DCHECK(header_len == sizeof(sep->len)); *result = StringVal(ctx->allocate(header_len), header_len); if (result->is_null) { return; } *reinterpret_cast(result->ptr) = sep->len; } result->append(ctx, sep->ptr, sep->len, src.ptr, src.len); } void AggregateFunctions::string_concat_merge(FunctionContext* ctx, const StringVal& src, StringVal* result) { if (src.is_null) { return; } const int header_len = sizeof(StringConcatHeader); if (result->is_null) { // Copy the header from the first intermediate value. *result = StringVal(ctx->allocate(header_len), header_len); if (result->is_null) { return; } *reinterpret_cast(result->ptr) = *reinterpret_cast(src.ptr); } // Append the string portion of the intermediate src to result (omit src's header). result->append(ctx, src.ptr + header_len, src.len - header_len); } StringVal AggregateFunctions::string_concat_finalize(FunctionContext* ctx, const StringVal& src) { if (src.is_null) { return src; } const int header_len = sizeof(StringConcatHeader); DCHECK(src.len >= header_len); int sep_len = *reinterpret_cast(src.ptr); DCHECK(src.len >= header_len + sep_len); // Remove the header and the first separator. StringVal result = StringVal::copy_from(ctx, src.ptr + header_len + sep_len, src.len - header_len - sep_len); ctx->free(src.ptr); return result; } // Compute distinctpc and distinctpcsa using Flajolet and Martin's algorithm // (Probabilistic Counting Algorithms for Data Base Applications) // We have implemented two variants here: one with stochastic averaging (with PCSA // postfix) and one without. // There are 4 phases to compute the aggregate: // 1. allocate a bitmap, stored in the aggregation tuple's output string slot // 2. update the bitmap per row (UpdateDistinctEstimateSlot) // 3. for distributed plan, merge the bitmaps from all the nodes // (UpdateMergeEstimateSlot) // 4. compute the estimate using the bitmaps when all the rows are processed // (FinalizeEstimateSlot) const static int NUM_PC_BITMAPS = 64; // number of bitmaps const static int PC_BITMAP_LENGTH = 32; // the length of each bit map const static float PC_THETA = 0.77351f; // the magic number to compute the final result void AggregateFunctions::pc_init(FunctionContext* c, StringVal* dst) { // Initialize the distinct estimate bit map - Probabilistic Counting Algorithms for Data // Base Applications (Flajolet and Martin) // // The bitmap is a 64bit(1st index) x 32bit(2nd index) matrix. // So, the string length of 256 byte is enough. // The layout is: // row 1: 8bit 8bit 8bit 8bit // row 2: 8bit 8bit 8bit 8bit // ... .. // ... .. // row 64: 8bit 8bit 8bit 8bit // // Using 32bit length, we can count up to 10^8. This will not be enough for Fact table // primary key, but once we approach the limit, we could interpret the result as // "every row is distinct". // // We use "string" type for DISTINCT_PC function so that we can use the string // slot to hold the bitmaps. dst->is_null = false; int str_len = NUM_PC_BITMAPS * PC_BITMAP_LENGTH / 8; dst->ptr = c->allocate(str_len); dst->len = str_len; memset(dst->ptr, 0, str_len); } static inline void set_distinct_estimate_bit(uint8_t* bitmap, uint32_t row_index, uint32_t bit_index) { // We need to convert Bitmap[alpha,index] into the index of the string. // alpha tells which of the 32bit we've to jump to. // index then lead us to the byte and bit. uint32_t* int_bitmap = reinterpret_cast(bitmap); int_bitmap[row_index] |= (1 << bit_index); } static inline bool get_distinct_estimate_bit(uint8_t* bitmap, uint32_t row_index, uint32_t bit_index) { uint32_t* int_bitmap = reinterpret_cast(bitmap); return ((int_bitmap[row_index] & (1 << bit_index)) > 0); } template void AggregateFunctions::pc_update(FunctionContext* c, const T& input, StringVal* dst) { if (input.is_null) { return; } // Core of the algorithm. This is a direct translation of the code in the paper. // Please see the paper for details. For simple averaging, we need to compute hash // values NUM_PC_BITMAPS times using NUM_PC_BITMAPS different hash functions (by using a // different seed). for (int i = 0; i < NUM_PC_BITMAPS; ++i) { uint32_t hash_value = AnyValUtil::hash(input, i); int bit_index = __builtin_ctz(hash_value); if (UNLIKELY(hash_value == 0)) { bit_index = PC_BITMAP_LENGTH - 1; } // Set bitmap[i, bit_index] to 1 set_distinct_estimate_bit(dst->ptr, i, bit_index); } } template void AggregateFunctions::pcsa_update(FunctionContext* c, const T& input, StringVal* dst) { if (input.is_null) { return; } // Core of the algorithm. This is a direct translation of the code in the paper. // Please see the paper for details. Using stochastic averaging, we only need to // the hash value once for each row. uint32_t hash_value = AnyValUtil::hash(input, 0); uint32_t row_index = hash_value % NUM_PC_BITMAPS; // We want the zero-based position of the least significant 1-bit in binary // representation of hash_value. __builtin_ctz does exactly this because it returns // the number of trailing 0-bits in x (or undefined if x is zero). int bit_index = __builtin_ctz(hash_value / NUM_PC_BITMAPS); if (UNLIKELY(hash_value == 0)) { bit_index = PC_BITMAP_LENGTH - 1; } // Set bitmap[row_index, bit_index] to 1 set_distinct_estimate_bit(dst->ptr, row_index, bit_index); } std::string distinct_estimate_bitmap_to_string(uint8_t* v) { std::stringstream debugstr; for (int i = 0; i < NUM_PC_BITMAPS; ++i) { for (int j = 0; j < PC_BITMAP_LENGTH; ++j) { // print bitmap[i][j] debugstr << get_distinct_estimate_bit(v, i, j); } debugstr << "\n"; } debugstr << "\n"; return debugstr.str(); } void AggregateFunctions::pc_merge(FunctionContext* c, const StringVal& src, StringVal* dst) { DCHECK(!src.is_null); DCHECK(!dst->is_null); DCHECK_EQ(src.len, NUM_PC_BITMAPS * PC_BITMAP_LENGTH / 8); // Merge the bits // I think _mm_or_ps can do it, but perf doesn't really matter here. We call this only // once group per node. for (int i = 0; i < NUM_PC_BITMAPS * PC_BITMAP_LENGTH / 8; ++i) { *(dst->ptr + i) |= *(src.ptr + i); } VLOG_ROW << "UpdateMergeEstimateSlot Src Bit map:\n" << distinct_estimate_bitmap_to_string(src.ptr); VLOG_ROW << "UpdateMergeEstimateSlot Dst Bit map:\n" << distinct_estimate_bitmap_to_string(dst->ptr); } double distince_estimate_finalize(const StringVal& src) { DCHECK(!src.is_null); DCHECK_EQ(src.len, NUM_PC_BITMAPS * PC_BITMAP_LENGTH / 8); VLOG_ROW << "FinalizeEstimateSlot Bit map:\n" << distinct_estimate_bitmap_to_string(src.ptr); // We haven't processed any rows if none of the bits are set. Therefore, we have zero // distinct rows. We're overwriting the result in the same string buffer we've // allocated. bool is_empty = true; for (int i = 0; i < NUM_PC_BITMAPS * PC_BITMAP_LENGTH / 8; ++i) { if (src.ptr[i] != 0) { is_empty = false; break; } } if (is_empty) { return 0; } // Convert the bitmap to a number, please see the paper for details // In short, we count the average number of leading 1s (per row) in the bit map. // The number is proportional to the log2(1/NUM_PC_BITMAPS of the actual number of // distinct). // To get the actual number of distinct, we'll do 2^avg / PC_THETA. // PC_THETA is a magic number. int sum = 0; for (int i = 0; i < NUM_PC_BITMAPS; ++i) { int row_bit_count = 0; // Count the number of leading ones for each row in the bitmap // We could have used the build in __builtin_clz to count of number of leading zeros // but we first need to invert the 1 and 0. while (get_distinct_estimate_bit(src.ptr, i, row_bit_count) && row_bit_count < PC_BITMAP_LENGTH) { ++row_bit_count; } sum += row_bit_count; } double avg = static_cast(sum) / static_cast(NUM_PC_BITMAPS); double result = std::pow(static_cast(2), avg) / PC_THETA; return result; } StringVal AggregateFunctions::pc_finalize(FunctionContext* c, const StringVal& src) { double estimate = distince_estimate_finalize(src); int64_t result = estimate; // TODO: this should return bigint. this is a hack std::stringstream ss; ss << result; std::string str = ss.str(); StringVal dst = src; memcpy(dst.ptr, str.c_str(), str.length()); dst.len = str.length(); return dst; } StringVal AggregateFunctions::pcsa_finalize(FunctionContext* c, const StringVal& src) { // When using stochastic averaging, the result has to be multiplied by NUM_PC_BITMAPS. double estimate = distince_estimate_finalize(src) * NUM_PC_BITMAPS; int64_t result = estimate; // TODO: this should return bigint. this is a hack std::stringstream ss; ss << result; std::string str = ss.str(); StringVal dst = src; memcpy(dst.ptr, str.c_str(), str.length()); dst.len = str.length(); return dst; } void AggregateFunctions::hll_init(FunctionContext* ctx, StringVal* dst) { int str_len = std::pow(2, HLL_PRECISION); dst->is_null = false; dst->ptr = ctx->allocate(str_len); dst->len = str_len; memset(dst->ptr, 0, str_len); } template void AggregateFunctions::hll_update(FunctionContext* ctx, const T& src, StringVal* dst) { if (src.is_null) { return; } DCHECK(!dst->is_null); DCHECK_EQ(dst->len, std::pow(2, HLL_PRECISION)); uint64_t hash_value = AnyValUtil::hash64_murmur(src, HashUtil::MURMUR_SEED); if (hash_value != 0) { // Use the lower bits to index into the number of streams and then // find the first 1 bit after the index bits. int idx = hash_value % dst->len; // uint8_t first_one_bit = __buiHLL_LENltin_ctzl(hash_value >> HLL_PRECISION) + 1; uint8_t first_one_bit = __builtin_ctzl(hash_value >> HLL_PRECISION) + 1; dst->ptr[idx] = std::max(dst->ptr[idx], first_one_bit); } } void AggregateFunctions::hll_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); DCHECK_EQ(dst->len, std::pow(2, HLL_PRECISION)); DCHECK_EQ(src.len, std::pow(2, HLL_PRECISION)); for (int i = 0; i < src.len; ++i) { dst->ptr[i] = std::max(dst->ptr[i], src.ptr[i]); } } StringVal AggregateFunctions::hll_finalize(FunctionContext* ctx, const StringVal& src) { double estimate = hll_algorithm(src); // Output the estimate as ascii string std::stringstream out; out << (int64_t)estimate; std::string out_str = out.str(); StringVal result_str(ctx, out_str.size()); memcpy(result_str.ptr, out_str.c_str(), result_str.len); return result_str; } void AggregateFunctions::hll_union_agg_init(FunctionContext* ctx, StringVal* dst) { int str_len = std::pow(2, HLL_PRECISION); dst->is_null = false; dst->ptr = ctx->allocate(str_len); dst->len = str_len; memset(dst->ptr, 0, str_len); } void AggregateFunctions::hll_union_parse_and_cal(HllSetResolver& resolver, StringVal* dst) { if (resolver.get_hll_data_type() == HLL_DATA_EMPTY) { return; } if (resolver.get_hll_data_type() == HLL_DATA_EXPLICIT) { for (int i = 0; i < resolver.get_expliclit_count(); i++) { uint64_t hash_value = resolver.get_expliclit_value(i); int idx = hash_value % dst->len; uint8_t first_one_bit = __builtin_ctzl(hash_value >> HLL_PRECISION) + 1; dst->ptr[idx] = std::max(dst->ptr[idx], first_one_bit); } } else if (resolver.get_hll_data_type() == HLL_DATA_SPRASE) { std::map& sparse_map = resolver.get_sparse_map(); for (std::map::iterator iter = sparse_map.begin(); iter != sparse_map.end(); iter++) { dst->ptr[iter->first] = std::max(dst->ptr[iter->first], (uint8_t)iter->second); } } else if (resolver.get_hll_data_type() == HLL_DATA_FULL) { char* full_value = resolver.get_full_value(); for (int i = 0; i < HLL_SETS_BYTES_NUM; i++) { dst->ptr[i] = std::max(dst->ptr[i], (uint8_t)full_value[i]); } } return ; } void AggregateFunctions::hll_union_agg_update(FunctionContext* ctx, const StringVal& src, StringVal* dst) { if (src.is_null) { return; } DCHECK(!dst->is_null); DCHECK_EQ(dst->len, std::pow(2, HLL_PRECISION)); HllSetResolver resolver; resolver.init((char*)src.ptr, src.len); resolver.parse(); hll_union_parse_and_cal(resolver, dst); return ; } void AggregateFunctions::hll_union_agg_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); DCHECK_EQ(dst->len, HLL_SETS_BYTES_NUM); DCHECK_EQ(src.len, HLL_SETS_BYTES_NUM); for (int i = 0; i < src.len; ++i) { dst->ptr[i] = std::max(dst->ptr[i], src.ptr[i]); } } palo_udf::StringVal AggregateFunctions::hll_union_agg_finalize(palo_udf::FunctionContext* ctx, const StringVal& src) { double estimate = hll_algorithm(src); std::stringstream out; out << (int64_t)estimate; std::string out_str = out.str(); StringVal result_str(ctx, out_str.size()); memcpy(result_str.ptr, out_str.c_str(), result_str.len); return result_str; } int64_t AggregateFunctions::hll_algorithm(const palo_udf::StringVal& src) { DCHECK(!src.is_null); DCHECK_EQ(src.len, HLL_SETS_BYTES_NUM); const int num_streams = HLL_SETS_BYTES_NUM; // Empirical constants for the algorithm. float alpha = 0; if (num_streams == 16) { alpha = 0.673f; } else if (num_streams == 32) { alpha = 0.697f; } else if (num_streams == 64) { alpha = 0.709f; } else { alpha = 0.7213f / (1 + 1.079f / num_streams); } float harmonic_mean = 0; int num_zero_registers = 0; for (int i = 0; i < src.len; ++i) { harmonic_mean += powf(2.0f, -src.ptr[i]); if (src.ptr[i] == 0) { ++num_zero_registers; } } harmonic_mean = 1.0f / harmonic_mean; double estimate = alpha * num_streams * num_streams * harmonic_mean; // according to HerperLogLog current correction, if E is cardinal // E =< num_streams * 2.5 , LC has higher accuracy. // num_streams * 2.5 < E , HerperLogLog has higher accuracy. // Generally , we can use HerperLogLog to produce value as E. if (estimate <= num_streams * 2.5 && num_zero_registers != 0) { // Estimated cardinality is too low. Hll is too inaccurate here, instead use // linear counting. estimate = num_streams * log(static_cast(num_streams) / num_zero_registers); } else if (num_streams == 16384 && estimate < 72000) { // when Linear Couint change to HerperLoglog according to HerperLogLog Correction, // there are relatively large fluctuations, we fixed the problem refer to redis. double bias = 5.9119 * 1.0e-18 * (estimate * estimate * estimate * estimate) - 1.4253 * 1.0e-12 * (estimate * estimate * estimate) + 1.2940 * 1.0e-7 * (estimate * estimate) - 5.2921 * 1.0e-3 * estimate + 83.3216; estimate -= estimate * (bias / 100); } return (int64_t)(estimate + 0.5); } // TODO chenhao , reduce memory copy // multi distinct state for numertic // serialize order type:value:value:value ... template class MultiDistinctNumericState { public: static void create(StringVal* dst) { dst->is_null = false; const int state_size = sizeof(MultiDistinctNumericState); MultiDistinctNumericState* state = new MultiDistinctNumericState(); if (std::is_same::value) { state->_type = FunctionContext::TYPE_TINYINT; } else if (std::is_same::value) { state->_type = FunctionContext::TYPE_SMALLINT; } else if (std::is_same::value) { state->_type = FunctionContext::TYPE_INT; } else if (std::is_same::value) { state->_type = FunctionContext::TYPE_BIGINT; } else if (std::is_same::value) { state->_type = FunctionContext::TYPE_LARGEINT; } else if (std::is_same::value) { state->_type = FunctionContext::TYPE_DOUBLE; } else if (std::is_same::value) { state->_type = FunctionContext::TYPE_FLOAT; } else { DCHECK(false); } dst->len = state_size; dst->ptr = (uint8_t*)state; } static void destory(const StringVal& dst) { delete (MultiDistinctNumericState*)dst.ptr; } void update(T& t) { _set.insert(t); } // type:one byte value:sizeof(T) StringVal serialize(FunctionContext* ctx) { size_t type_size = sizeof(((T*)0)->val); const size_t serialized_set_length = sizeof(uint8_t) + type_size * _set.size(); StringVal result(ctx, serialized_set_length); uint8_t* type_writer = result.ptr; // type *type_writer = (uint8_t)_type; type_writer++; // value for (auto& value : _set) { memcpy(type_writer, &value.val, type_size); type_writer += type_size; } return result; } void unserialize(StringVal& src) { size_t type_size = sizeof(((T*)0)->val); const uint8_t* type_reader = src.ptr; const uint8_t* end = src.ptr + src.len; // type _type = (FunctionContext::Type)*type_reader; type_reader++; // value while (type_reader < end) { T value; value.is_null = false; memcpy(&value.val, type_reader, type_size); _set.insert(value); type_reader += type_size; } } // merge set void merge(MultiDistinctNumericState& state) { _set.insert(state._set.begin(), state._set.end()); } // count BigIntVal count_finalize() { return BigIntVal(_set.size()); } // sum for double, decimal DoubleVal sum_finalize_double() { double sum = 0; for (auto& value : _set) { sum += value.val; } return DoubleVal(sum); } // sum for largeint LargeIntVal sum_finalize_largeint() { __int128 sum = 0; for (auto& value : _set) { sum += value.val; } return LargeIntVal(sum); } // sum for tinyint, smallint, int, bigint BigIntVal sum_finalize_bigint() { int64_t sum = 0; for (auto& value : _set) { sum += value.val; } return BigIntVal(sum); } FunctionContext::Type set_type() { return _type; } private: class NumericHashHelper { public: size_t operator()(const T& obj) const { size_t result = AnyValUtil::hash64_murmur(obj, HashUtil::MURMUR_SEED); return result; } }; std::unordered_set _set; // Because Anyval does not provide the hash function, in order // to adopt the type different from the template, the pointer is used // HybirdSetBase* _set; // _type is serialized into buffer by one byte FunctionContext::Type _type; }; // multi distinct state for string // // serialize order type:len:value:len:value ... class MultiDistinctStringCountState { public: static void create(StringVal* dst) { dst->is_null = false; const int state_size = sizeof(MultiDistinctStringCountState); MultiDistinctStringCountState* state = new MultiDistinctStringCountState(); // type length state->_type = FunctionContext::TYPE_STRING; dst->len = state_size; dst->ptr = (uint8_t*)state; } static void destory(const StringVal& dst) { delete (MultiDistinctStringCountState*)dst.ptr; } inline void update(StringValue* sv) { _set.insert(sv); } StringVal serialize(FunctionContext* ctx) { // calculate total serialize buffer length int total_serialized_set_length = 1; HybirdSetBase::IteratorBase* iterator = _set.begin(); while (iterator->has_next()) { const StringValue* value = reinterpret_cast(iterator->get_value()); total_serialized_set_length += STRING_LENGTH_RECORD_LENGTH + value->len; iterator->next(); } StringVal result(ctx, total_serialized_set_length); uint8_t* writer = result.ptr; // type *writer = _type; writer ++; iterator = _set.begin(); while (iterator->has_next()) { const StringValue* value = reinterpret_cast (iterator->get_value()); // length, it is unnecessary to consider little or big endian for // all running in little-endian. *(int*)writer = value->len; writer += STRING_LENGTH_RECORD_LENGTH; // value memcpy(writer, value->ptr, value->len); writer += value->len; iterator->next(); } return result; } void unserialize(StringVal& src) { uint8_t* reader = src.ptr; // skip type ,no used now _type = (FunctionContext::Type)*reader; DCHECK(_type == FunctionContext::TYPE_STRING); reader ++; const uint8_t* end = src.ptr + src.len; while (reader < end) { const int length = *(int*)reader; reader += STRING_LENGTH_RECORD_LENGTH; StringValue value((char*)reader, length); _set.insert(&value); reader += length; } DCHECK(reader == end); } // merge set void merge(MultiDistinctStringCountState& state) { _set.insert(&(state._set)); } BigIntVal finalize() { return BigIntVal(_set.size()); } FunctionContext::Type set_type() { return _type; } static const int STRING_LENGTH_RECORD_LENGTH = 4; private: StringValueSet _set; // _type is serialized into buffer by one byte FunctionContext::Type _type; }; // multi distinct state for decimal // // serialize order type:int_len:frac_len:sign:int_len ... class MultiDistinctDecimalState { public: static void create(StringVal* dst) { dst->is_null = false; const int state_size = sizeof(MultiDistinctDecimalState); MultiDistinctDecimalState* state = new MultiDistinctDecimalState(); state->_type = FunctionContext::TYPE_DECIMAL; dst->len = state_size; dst->ptr = (uint8_t*)state; } static void destory(const StringVal& dst) { delete (MultiDistinctDecimalState*)dst.ptr; } void update(DecimalVal& t) { _set.insert(DecimalValue::from_decimal_val(t)); } // type:one byte value:sizeof(T) StringVal serialize(FunctionContext* ctx) { const int serialized_set_length = sizeof(uint8_t) + (DECIMAL_INT_LEN_BYTE_SIZE + DECIMAL_FRAC_BYTE_SIZE + DECIMAL_SIGN_BYTE_SIZE + DECIMAL_BUFFER_BYTE_SIZE) * _set.size(); StringVal result(ctx, serialized_set_length); uint8_t* writer = result.ptr; *writer = (uint8_t)_type; writer++; // for int_length and frac_length, uint8_t will not overflow. for (auto& value : _set) { *writer = value._int_length; writer += DECIMAL_INT_LEN_BYTE_SIZE; *writer = value._frac_length; writer += DECIMAL_FRAC_BYTE_SIZE; *writer = value._sign; writer += DECIMAL_SIGN_BYTE_SIZE; memcpy(writer, value._buffer, DECIMAL_BUFFER_BYTE_SIZE); writer += DECIMAL_BUFFER_BYTE_SIZE; } return result; } void unserialize(StringVal& src) { const uint8_t* reader = src.ptr; // type _type = (FunctionContext::Type)*reader; reader++; const uint8_t* end = src.ptr + src.len; // value while (reader < end) { DecimalValue value; value._int_length = *reader; reader += DECIMAL_INT_LEN_BYTE_SIZE; value._frac_length = *reader; reader += DECIMAL_FRAC_BYTE_SIZE; value._sign = *reader; reader += DECIMAL_SIGN_BYTE_SIZE; value._buffer_length = DECIMAL_BUFF_LENGTH; memcpy(value._buffer, reader, DECIMAL_BUFFER_BYTE_SIZE); reader += DECIMAL_BUFFER_BYTE_SIZE; _set.insert(value); } } FunctionContext::Type set_type() { return _type; } // merge set void merge(MultiDistinctDecimalState& state) { _set.insert(state._set.begin(), state._set.end()); } // count BigIntVal count_finalize() { return BigIntVal(_set.size()); } DecimalVal sum_finalize() { DecimalValue sum; for (auto& value : _set) { sum += value; } DecimalVal result; sum.to_decimal_val(&result); return result; } private: const int DECIMAL_INT_LEN_BYTE_SIZE = 1; const int DECIMAL_FRAC_BYTE_SIZE = 1; const int DECIMAL_SIGN_BYTE_SIZE = 1; const int DECIMAL_BUFFER_BYTE_SIZE = 36; std::unordered_set _set; FunctionContext::Type _type; }; // multi distinct state for date // // serialize order type:packed_time:type:packed_time:type ... class MultiDistinctCountDateState { public: static void create(StringVal* dst) { dst->is_null = false; const int state_size = sizeof(MultiDistinctCountDateState); MultiDistinctCountDateState* state = new MultiDistinctCountDateState(); state->_type = FunctionContext::TYPE_DATETIME; dst->len = state_size; dst->ptr = (uint8_t*)state; } static void destory(const StringVal& dst) { delete (MultiDistinctCountDateState*)dst.ptr; } void update(DateTimeVal& t) { _set.insert(t); } // type:one byte value:sizeof(T) StringVal serialize(FunctionContext* ctx) { const int serialized_set_length = sizeof(uint8_t) + (DATETIME_PACKED_TIME_BYTE_SIZE + DATETIME_TYPE_BYTE_SIZE) * _set.size(); StringVal result(ctx, serialized_set_length); uint8_t* writer = result.ptr; // type *writer = (uint8_t)_type; writer++; // value for (auto& value : _set) { int64_t* packed_time_writer = (int64_t*)writer; *packed_time_writer = value.packed_time; writer += DATETIME_PACKED_TIME_BYTE_SIZE; int* type_writer = (int*)writer; *type_writer = value.type; writer += DATETIME_TYPE_BYTE_SIZE; } return result; } void unserialize(StringVal& src) { const uint8_t* reader = src.ptr; // type _type = (FunctionContext::Type)*reader; reader++; const uint8_t* end = src.ptr + src.len; // value while (reader < end) { DateTimeVal value; value.is_null = false; int64_t* packed_time_reader = (int64_t*)reader; value.packed_time = *packed_time_reader; reader += DATETIME_PACKED_TIME_BYTE_SIZE; int* type_reader = (int*)reader; value.type = *type_reader; reader += DATETIME_TYPE_BYTE_SIZE; _set.insert(value); } } // merge set void merge(MultiDistinctCountDateState& state) { _set.insert(state._set.begin(), state._set.end()); } // count BigIntVal count_finalize() { return BigIntVal(_set.size()); } FunctionContext::Type set_type() { return _type; } private: class DateTimeHashHelper { public: size_t operator()(const DateTimeVal& obj) const { size_t result = AnyValUtil::hash64_murmur(obj, HashUtil::MURMUR_SEED); return result; } }; const int DATETIME_PACKED_TIME_BYTE_SIZE = 8; const int DATETIME_TYPE_BYTE_SIZE = 4; std::unordered_set _set; FunctionContext::Type _type; }; template void AggregateFunctions::count_or_sum_distinct_numeric_init(FunctionContext* ctx, StringVal* dst) { MultiDistinctNumericState::create(dst); } void AggregateFunctions::count_distinct_string_init(FunctionContext* ctx, StringVal* dst) { MultiDistinctStringCountState::create(dst); } void AggregateFunctions::count_or_sum_distinct_decimal_init(FunctionContext* ctx, StringVal* dst) { MultiDistinctDecimalState::create(dst); } void AggregateFunctions::count_distinct_date_init(FunctionContext* ctx, StringVal* dst) { MultiDistinctCountDateState::create(dst); } template void AggregateFunctions::count_or_sum_distinct_numeric_update(FunctionContext* ctx, T& src, StringVal* dst) { DCHECK(!dst->is_null); if (src.is_null) return; MultiDistinctNumericState* state = reinterpret_cast*>(dst->ptr); state->update(src); } void AggregateFunctions::count_distinct_string_update(FunctionContext* ctx, StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); if (src.is_null) return; MultiDistinctStringCountState* state = reinterpret_cast(dst->ptr); StringValue sv = StringValue::from_string_val(src); state->update(&sv); } void AggregateFunctions::count_or_sum_distinct_decimal_update(FunctionContext* ctx, DecimalVal& src, StringVal* dst) { DCHECK(!dst->is_null); if (src.is_null) return; MultiDistinctDecimalState* state = reinterpret_cast(dst->ptr); state->update(src); } void AggregateFunctions::count_distinct_date_update(FunctionContext* ctx, DateTimeVal& src, StringVal* dst) { DCHECK(!dst->is_null); if (src.is_null) return; MultiDistinctCountDateState* state = reinterpret_cast(dst->ptr); state->update(src); } template void AggregateFunctions::count_or_sum_distinct_numeric_merge(FunctionContext* ctx, StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); MultiDistinctNumericState* dst_state = reinterpret_cast*>(dst->ptr); // unserialize src StringVal src_state_val; MultiDistinctNumericState::create(&src_state_val); MultiDistinctNumericState* src_state = reinterpret_cast*>(src_state_val.ptr); src_state->unserialize(src); DCHECK(dst_state->set_type() == src_state->set_type()); dst_state->merge(*src_state); MultiDistinctNumericState::destory(src_state_val); } void AggregateFunctions::count_distinct_string_merge(FunctionContext* ctx, StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); MultiDistinctStringCountState* dst_state = reinterpret_cast(dst->ptr); // unserialize src StringVal src_state_val; MultiDistinctStringCountState::create(&src_state_val); MultiDistinctStringCountState* src_state = reinterpret_cast(src_state_val.ptr); src_state->unserialize(src); DCHECK(dst_state->set_type() == src_state->set_type()); dst_state->merge(*src_state); MultiDistinctStringCountState::destory(src_state_val); } void AggregateFunctions::count_or_sum_distinct_decimal_merge(FunctionContext* ctx, StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); MultiDistinctDecimalState* dst_state = reinterpret_cast(dst->ptr); // unserialize src StringVal src_state_val; MultiDistinctDecimalState::create(&src_state_val); MultiDistinctDecimalState* src_state = reinterpret_cast(src_state_val.ptr); src_state->unserialize(src); DCHECK(dst_state->set_type() == src_state->set_type()); dst_state->merge(*src_state); MultiDistinctDecimalState::destory(src_state_val); } void AggregateFunctions::count_distinct_date_merge(FunctionContext* ctx, StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK(!src.is_null); MultiDistinctCountDateState* dst_state = reinterpret_cast(dst->ptr); // unserialize src StringVal src_state_val; MultiDistinctCountDateState::create(&src_state_val); MultiDistinctCountDateState* src_state = reinterpret_cast(src_state_val.ptr); src_state->unserialize(src); DCHECK(dst_state->set_type() == src_state->set_type()); dst_state->merge(*src_state); MultiDistinctCountDateState::destory(src_state_val); } template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); StringVal result = state->serialize(ctx); // release original object MultiDistinctNumericState::destory(state_sv); return result; } StringVal AggregateFunctions::count_distinct_string_serialize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctStringCountState* state = reinterpret_cast(state_sv.ptr); StringVal result = state->serialize(ctx); // release original object MultiDistinctStringCountState::destory(state_sv); return result; } StringVal AggregateFunctions::count_or_sum_distinct_decimal_serialize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctDecimalState* state = reinterpret_cast(state_sv.ptr); StringVal result = state->serialize(ctx); // release original object MultiDistinctDecimalState::destory(state_sv); return result; } StringVal AggregateFunctions::count_distinct_date_serialize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctCountDateState* state = reinterpret_cast(state_sv.ptr); StringVal result = state->serialize(ctx); // release original object MultiDistinctCountDateState::destory(state_sv); return result; } template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); BigIntVal result = state->count_finalize(); MultiDistinctNumericState::destory(state_sv); return result; } BigIntVal AggregateFunctions::count_distinct_string_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctStringCountState* state = reinterpret_cast(state_sv.ptr); BigIntVal result = state->finalize(); MultiDistinctStringCountState::destory(state_sv); return result; } template DoubleVal AggregateFunctions::sum_distinct_double_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); DoubleVal result = state->sum_finalize_double(); MultiDistinctNumericState::destory(state_sv); return result; } template LargeIntVal AggregateFunctions::sum_distinct_largeint_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); LargeIntVal result = state->sum_finalize_largeint(); MultiDistinctNumericState::destory(state_sv); return result; } template BigIntVal AggregateFunctions::sum_distinct_bigint_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctNumericState* state = reinterpret_cast*>(state_sv.ptr); BigIntVal result = state->sum_finalize_bigint(); MultiDistinctNumericState::destory(state_sv); return result; } BigIntVal AggregateFunctions::count_distinct_decimal_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctDecimalState* state = reinterpret_cast(state_sv.ptr); BigIntVal result = state->count_finalize(); MultiDistinctDecimalState::destory(state_sv); return result; } DecimalVal AggregateFunctions::sum_distinct_decimal_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctDecimalState* state = reinterpret_cast(state_sv.ptr); DecimalVal result = state->sum_finalize(); MultiDistinctDecimalState::destory(state_sv); return result; } BigIntVal AggregateFunctions::count_distinct_date_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); MultiDistinctCountDateState* state = reinterpret_cast(state_sv.ptr); BigIntVal result = state->count_finalize(); MultiDistinctCountDateState::destory(state_sv); return result; } // An implementation of a simple single pass variance algorithm. A standard UDA must // be single pass (i.e. does not scan the table more than once), so the most canonical // two pass approach is not practical. struct KnuthVarianceState { double mean; double m2; int64_t count; }; // Set pop=true for population variance, false for sample variance static double compute_knuth_variance(const KnuthVarianceState& state, bool pop) { // Return zero for 1 tuple specified by // http://docs.oracle.com/cd/B19306_01/server.102/b14200/functions212.htm if (state.count == 1) return 0.0; if (pop) return state.m2 / state.count; return state.m2 / (state.count - 1); } void AggregateFunctions::knuth_var_init(FunctionContext* ctx, StringVal* dst) { dst->is_null = false; // TODO(zc) dst->len = sizeof(KnuthVarianceState); dst->ptr = ctx->allocate(dst->len); DCHECK_EQ(dst->len, sizeof(KnuthVarianceState)); memset(dst->ptr, 0, dst->len); } template void AggregateFunctions::knuth_var_update(FunctionContext* ctx, const T& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK_EQ(dst->len, sizeof(KnuthVarianceState)); if (src.is_null) return; KnuthVarianceState* state = reinterpret_cast(dst->ptr); double temp = 1 + state->count; double delta = src.val - state->mean; double r = delta / temp; state->mean += r; state->m2 += state->count * delta * r; state->count = temp; } void AggregateFunctions::knuth_var_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst) { DCHECK(!dst->is_null); DCHECK_EQ(dst->len, sizeof(KnuthVarianceState)); DCHECK(!src.is_null); DCHECK_EQ(src.len, sizeof(KnuthVarianceState)); // Reference implementation: // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm KnuthVarianceState* src_state = reinterpret_cast(src.ptr); KnuthVarianceState* dst_state = reinterpret_cast(dst->ptr); if (src_state->count == 0) return; double delta = dst_state->mean - src_state->mean; double sum_count = dst_state->count + src_state->count; dst_state->mean = src_state->mean + delta * (dst_state->count / sum_count); dst_state->m2 = (src_state->m2) + dst_state->m2 + (delta * delta) * (src_state->count * dst_state->count / sum_count); dst_state->count = sum_count; } DoubleVal AggregateFunctions::knuth_var_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); KnuthVarianceState* state = reinterpret_cast(state_sv.ptr); if (state->count == 0 || state->count == 1) return DoubleVal::null(); double variance = compute_knuth_variance(*state, false); ctx->free(state_sv.ptr); return DoubleVal(variance); } DoubleVal AggregateFunctions::knuth_var_pop_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); DCHECK_EQ(state_sv.len, sizeof(KnuthVarianceState)); KnuthVarianceState* state = reinterpret_cast(state_sv.ptr); if (state->count == 0) return DoubleVal::null(); double variance = compute_knuth_variance(*state, true); ctx->free(state_sv.ptr); return DoubleVal(variance); } DoubleVal AggregateFunctions::knuth_stddev_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); DCHECK_EQ(state_sv.len, sizeof(KnuthVarianceState)); KnuthVarianceState* state = reinterpret_cast(state_sv.ptr); if (state->count == 0 || state->count == 1) return DoubleVal::null(); double variance = sqrt(compute_knuth_variance(*state, false)); ctx->free(state_sv.ptr); return DoubleVal(variance); } DoubleVal AggregateFunctions::knuth_stddev_pop_finalize(FunctionContext* ctx, const StringVal& state_sv) { DCHECK(!state_sv.is_null); DCHECK_EQ(state_sv.len, sizeof(KnuthVarianceState)); KnuthVarianceState* state = reinterpret_cast(state_sv.ptr); if (state->count == 0) return DoubleVal::null(); double variance = sqrt(compute_knuth_variance(*state, true)); ctx->free(state_sv.ptr); return DoubleVal(variance); } struct RankState { int64_t rank; int64_t count; RankState() : rank(1), count(0) { } }; void AggregateFunctions::rank_init(FunctionContext* ctx, StringVal* dst) { int str_len = sizeof(RankState); dst->is_null = false; dst->ptr = ctx->allocate(str_len); dst->len = str_len; *reinterpret_cast(dst->ptr) = RankState(); } void AggregateFunctions::rank_update(FunctionContext* ctx, StringVal* dst) { DCHECK(!dst->is_null); DCHECK_EQ(dst->len, sizeof(RankState)); RankState* state = reinterpret_cast(dst->ptr); ++state->count; } void AggregateFunctions::dense_rank_update(FunctionContext* ctx, StringVal* dst) { } BigIntVal AggregateFunctions::rank_get_value(FunctionContext* ctx, StringVal& src_val) { DCHECK(!src_val.is_null); DCHECK_EQ(src_val.len, sizeof(RankState)); RankState* state = reinterpret_cast(src_val.ptr); DCHECK_GT(state->count, 0); DCHECK_GT(state->rank, 0); int64_t result = state->rank; // Prepares future calls for the next rank state->rank += state->count; state->count = 0; return BigIntVal(result); } BigIntVal AggregateFunctions::dense_rank_get_value(FunctionContext* ctx, StringVal& src_val) { DCHECK(!src_val.is_null); DCHECK_EQ(src_val.len, sizeof(RankState)); RankState* state = reinterpret_cast(src_val.ptr); DCHECK_EQ(state->count, 0); DCHECK_GT(state->rank, 0); int64_t result = state->rank; // Prepares future calls for the next rank ++state->rank; return BigIntVal(result); } BigIntVal AggregateFunctions::rank_finalize(FunctionContext* ctx, StringVal& src_val) { DCHECK(!src_val.is_null); DCHECK_EQ(src_val.len, sizeof(RankState)); RankState* state = reinterpret_cast(src_val.ptr); int64_t result = state->rank; ctx->free(src_val.ptr); return BigIntVal(result); } template void AggregateFunctions::last_val_update(FunctionContext* ctx, const T& src, T* dst) { *dst = src; } template <> void AggregateFunctions::last_val_update(FunctionContext* ctx, const StringVal& src, StringVal* dst) { if (src.is_null) { if (!dst->is_null) { ctx->free(dst->ptr); } *dst = StringVal::null(); return; } if (dst->is_null) { dst->ptr = ctx->allocate(src.len); dst->is_null = false; } else { dst->ptr = ctx->reallocate(dst->ptr, src.len); } memcpy(dst->ptr, src.ptr, src.len); dst->len = src.len; } template void AggregateFunctions::last_val_remove(FunctionContext* ctx, const T& src, T* dst) { if (ctx->impl()->num_removes() >= ctx->impl()->num_updates()) { *dst = T::null(); } } template <> void AggregateFunctions::last_val_remove(FunctionContext* ctx, const StringVal& src, StringVal* dst) { if (ctx->impl()->num_removes() >= ctx->impl()->num_updates()) { if (!dst->is_null) { ctx->free(dst->ptr); } *dst = StringVal::null(); } } template void AggregateFunctions::first_val_update(FunctionContext* ctx, const T& src, T* dst) { // The first call to first_val_update sets the value of dst. if (ctx->impl()->num_updates() > 1) { return; } // num_updates is incremented before calling Update(), so it should never be 0. // Remove() should never be called for FIRST_VALUE. DCHECK_GT(ctx->impl()->num_updates(), 0); DCHECK_EQ(ctx->impl()->num_removes(), 0); *dst = src; } template <> void AggregateFunctions::first_val_update(FunctionContext* ctx, const IntVal& src, IntVal* dst) { // The first call to FirstValUpdate sets the value of dst. if (ctx->impl()->num_updates() > 1) { return; } // num_updates is incremented before calling Update(), so it should never be 0. // Remove() should never be called for FIRST_VALUE. DCHECK_GT(ctx->impl()->num_updates(), 0); DCHECK_EQ(ctx->impl()->num_removes(), 0); *dst = src; } template <> void AggregateFunctions::first_val_update(FunctionContext* ctx, const StringVal& src, StringVal* dst) { if (ctx->impl()->num_updates() > 1) { return; } DCHECK_GT(ctx->impl()->num_updates(), 0); DCHECK_EQ(ctx->impl()->num_removes(), 0); if (src.is_null) { *dst = StringVal::null(); return; } *dst = StringVal(ctx->allocate(src.len), src.len); memcpy(dst->ptr, src.ptr, src.len); } template void AggregateFunctions::first_val_rewrite_update(FunctionContext* ctx, const T& src, const BigIntVal&, T* dst) { last_val_update(ctx, src, dst); } template void AggregateFunctions::offset_fn_init(FunctionContext* ctx, T* dst) { DCHECK_EQ(ctx->get_num_args(), 3); DCHECK(ctx->is_arg_constant(1)); DCHECK(ctx->is_arg_constant(2)); DCHECK_EQ(ctx->get_arg_type(0)->type, ctx->get_arg_type(2)->type); *dst = *static_cast(ctx->get_constant_arg(2)); } /* template <> void AggregateFunctions::offset_fn_init(FunctionContext* ctx, IntVal* dst) { DCHECK_EQ(ctx->get_num_args(), 3); DCHECK(ctx->is_arg_constant(1)); DCHECK(ctx->is_arg_constant(2)); // DCHECK_EQ(*ctx->GetArgType(0), *ctx->GetArgType(2)); *dst = *static_cast(ctx->get_constant_arg(2)); } */ template void AggregateFunctions::offset_fn_update(FunctionContext* ctx, const T& src, const BigIntVal&, const T& default_value, T* dst) { *dst = src; } template <> void AggregateFunctions::offset_fn_update(FunctionContext* ctx, const IntVal& src, const BigIntVal&, const IntVal& default_value, IntVal* dst) { *dst = src; } // Stamp out the templates for the types we need. template void AggregateFunctions::init_zero(FunctionContext*, BigIntVal* dst); template void AggregateFunctions::sum_remove( FunctionContext*, const BooleanVal& src, BigIntVal* dst); template void AggregateFunctions::sum_remove( FunctionContext*, const TinyIntVal& src, BigIntVal* dst); template void AggregateFunctions::sum_remove( FunctionContext*, const SmallIntVal& src, BigIntVal* dst); template void AggregateFunctions::sum_remove( FunctionContext*, const IntVal& src, BigIntVal* dst); template void AggregateFunctions::sum_remove( FunctionContext*, const BigIntVal& src, BigIntVal* dst); template void AggregateFunctions::sum_remove( FunctionContext*, const FloatVal& src, DoubleVal* dst); template void AggregateFunctions::sum_remove( FunctionContext*, const DoubleVal& src, DoubleVal* dst); template void AggregateFunctions::sum_remove( FunctionContext*, const DecimalVal& src, DecimalVal* dst); template void AggregateFunctions::sum_remove( FunctionContext*, const LargeIntVal& src, LargeIntVal* dst); template void AggregateFunctions::avg_update( palo_udf::FunctionContext*, palo_udf::BooleanVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_update( palo_udf::FunctionContext*, palo_udf::IntVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_remove( palo_udf::FunctionContext*, palo_udf::IntVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_update( palo_udf::FunctionContext*, palo_udf::BigIntVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_remove( palo_udf::FunctionContext*, palo_udf::BigIntVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_update( palo_udf::FunctionContext*, palo_udf::FloatVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_remove( palo_udf::FunctionContext*, palo_udf::FloatVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_update( palo_udf::FunctionContext*, palo_udf::DoubleVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_remove( palo_udf::FunctionContext*, palo_udf::DoubleVal const&, palo_udf::StringVal*); //template void AggregateFunctions::AvgUpdate( //palo_udf::FunctionContext*, palo_udf::LargeIntVal const&, palo_udf::StringVal*); //template void AggregateFunctions::AvgRemove( //palo_udf::FunctionContext*, palo_udf::LargeIntVal const&, palo_udf::StringVal*); template void AggregateFunctions::sum( FunctionContext*, const BooleanVal& src, BigIntVal* dst); template void AggregateFunctions::sum( FunctionContext*, const TinyIntVal& src, BigIntVal* dst); template void AggregateFunctions::sum( FunctionContext*, const SmallIntVal& src, BigIntVal* dst); template void AggregateFunctions::sum( FunctionContext*, const IntVal& src, BigIntVal* dst); template void AggregateFunctions::sum( FunctionContext*, const BigIntVal& src, BigIntVal* dst); template void AggregateFunctions::sum( FunctionContext*, const FloatVal& src, DoubleVal* dst); template void AggregateFunctions::sum( FunctionContext*, const DoubleVal& src, DoubleVal* dst); template void AggregateFunctions::min( FunctionContext*, const BooleanVal& src, BooleanVal* dst); template void AggregateFunctions::min( FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); template void AggregateFunctions::min( FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); template void AggregateFunctions::min( FunctionContext*, const IntVal& src, IntVal* dst); template void AggregateFunctions::min( FunctionContext*, const BigIntVal& src, BigIntVal* dst); template void AggregateFunctions::min( FunctionContext*, const FloatVal& src, FloatVal* dst); template void AggregateFunctions::min( FunctionContext*, const DoubleVal& src, DoubleVal* dst); template void AggregateFunctions::min( FunctionContext*, const StringVal& src, StringVal* dst); template void AggregateFunctions::avg_remove( palo_udf::FunctionContext*, palo_udf::BooleanVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_update( palo_udf::FunctionContext*, palo_udf::TinyIntVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_remove( palo_udf::FunctionContext*, palo_udf::TinyIntVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_update( palo_udf::FunctionContext*, palo_udf::SmallIntVal const&, palo_udf::StringVal*); template void AggregateFunctions::avg_remove( palo_udf::FunctionContext*, palo_udf::SmallIntVal const&, palo_udf::StringVal*); template void AggregateFunctions::max( FunctionContext*, const BooleanVal& src, BooleanVal* dst); template void AggregateFunctions::max( FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); template void AggregateFunctions::max( FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); template void AggregateFunctions::max( FunctionContext*, const IntVal& src, IntVal* dst); template void AggregateFunctions::max( FunctionContext*, const BigIntVal& src, BigIntVal* dst); template void AggregateFunctions::max( FunctionContext*, const FloatVal& src, FloatVal* dst); template void AggregateFunctions::max( FunctionContext*, const DoubleVal& src, DoubleVal* dst); template void AggregateFunctions::max( FunctionContext*, const StringVal& src, StringVal* dst); template void AggregateFunctions::pc_update( FunctionContext*, const BooleanVal&, StringVal*); template void AggregateFunctions::pc_update( FunctionContext*, const TinyIntVal&, StringVal*); template void AggregateFunctions::pc_update( FunctionContext*, const SmallIntVal&, StringVal*); template void AggregateFunctions::pc_update( FunctionContext*, const IntVal&, StringVal*); template void AggregateFunctions::pc_update( FunctionContext*, const BigIntVal&, StringVal*); template void AggregateFunctions::pc_update( FunctionContext*, const FloatVal&, StringVal*); template void AggregateFunctions::pc_update( FunctionContext*, const DoubleVal&, StringVal*); template void AggregateFunctions::pc_update( FunctionContext*, const StringVal&, StringVal*); template void AggregateFunctions::pc_update( FunctionContext*, const DateTimeVal&, StringVal*); template void AggregateFunctions::pcsa_update( FunctionContext*, const BooleanVal&, StringVal*); template void AggregateFunctions::pcsa_update( FunctionContext*, const TinyIntVal&, StringVal*); template void AggregateFunctions::pcsa_update( FunctionContext*, const SmallIntVal&, StringVal*); template void AggregateFunctions::pcsa_update( FunctionContext*, const IntVal&, StringVal*); template void AggregateFunctions::pcsa_update( FunctionContext*, const BigIntVal&, StringVal*); template void AggregateFunctions::pcsa_update( FunctionContext*, const FloatVal&, StringVal*); template void AggregateFunctions::pcsa_update( FunctionContext*, const DoubleVal&, StringVal*); template void AggregateFunctions::pcsa_update( FunctionContext*, const StringVal&, StringVal*); template void AggregateFunctions::pcsa_update( FunctionContext*, const DateTimeVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const BooleanVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const TinyIntVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const SmallIntVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const IntVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const BigIntVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const FloatVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const DoubleVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const StringVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const DateTimeVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const LargeIntVal&, StringVal*); template void AggregateFunctions::hll_update( FunctionContext*, const DecimalVal&, StringVal*); template void AggregateFunctions::count_or_sum_distinct_numeric_init( FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_init( FunctionContext* ctx, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( FunctionContext* ctx, TinyIntVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( FunctionContext* ctx, SmallIntVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( FunctionContext* ctx, IntVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( FunctionContext* ctx, BigIntVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( FunctionContext* ctx, FloatVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( FunctionContext* ctx, DoubleVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_update( FunctionContext* ctx, LargeIntVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( FunctionContext* ctx, StringVal& src, StringVal* dst); template void AggregateFunctions::count_or_sum_distinct_numeric_merge( FunctionContext* ctx, StringVal& src, StringVal* dst); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( FunctionContext* ctx, const StringVal& state_sv); template StringVal AggregateFunctions::count_or_sum_distinct_numeric_serialize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::count_or_sum_distinct_numeric_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::sum_distinct_bigint_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::sum_distinct_bigint_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::sum_distinct_bigint_finalize( FunctionContext* ctx, const StringVal& state_sv); template BigIntVal AggregateFunctions::sum_distinct_bigint_finalize( FunctionContext* ctx, const StringVal& state_sv); template DoubleVal AggregateFunctions::sum_distinct_double_finalize( FunctionContext* ctx, const StringVal& state_sv); template LargeIntVal AggregateFunctions::sum_distinct_largeint_finalize( FunctionContext* ctx, const StringVal& state_sv); template void AggregateFunctions::knuth_var_update( FunctionContext*, const TinyIntVal&, StringVal*); template void AggregateFunctions::knuth_var_update( FunctionContext*, const SmallIntVal&, StringVal*); template void AggregateFunctions::knuth_var_update( FunctionContext*, const IntVal&, StringVal*); template void AggregateFunctions::knuth_var_update( FunctionContext*, const BigIntVal&, StringVal*); template void AggregateFunctions::knuth_var_update( FunctionContext*, const FloatVal&, StringVal*); template void AggregateFunctions::knuth_var_update( FunctionContext*, const DoubleVal&, StringVal*); template void AggregateFunctions::first_val_update( FunctionContext*, const BooleanVal& src, BooleanVal* dst); template void AggregateFunctions::first_val_update( FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); template void AggregateFunctions::first_val_update( FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); template void AggregateFunctions::first_val_update( FunctionContext*, const IntVal& src, IntVal* dst); template void AggregateFunctions::first_val_update( FunctionContext*, const BigIntVal& src, BigIntVal* dst); template void AggregateFunctions::first_val_update( FunctionContext*, const FloatVal& src, FloatVal* dst); template void AggregateFunctions::first_val_update( FunctionContext*, const DoubleVal& src, DoubleVal* dst); template void AggregateFunctions::first_val_update( FunctionContext*, const StringVal& src, StringVal* dst); template void AggregateFunctions::first_val_update( FunctionContext*, const DateTimeVal& src, DateTimeVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const BooleanVal& src, const BigIntVal&, BooleanVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const TinyIntVal& src, const BigIntVal&, TinyIntVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const SmallIntVal& src, const BigIntVal&, SmallIntVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const IntVal& src, const BigIntVal&, IntVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const BigIntVal& src, const BigIntVal&, BigIntVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const FloatVal& src, const BigIntVal&, FloatVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const DoubleVal& src, const BigIntVal&, DoubleVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const StringVal& src, const BigIntVal&, StringVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const DateTimeVal& src, const BigIntVal&, DateTimeVal* dst); template void AggregateFunctions::first_val_rewrite_update( FunctionContext*, const DecimalVal& src, const BigIntVal&, DecimalVal* dst); //template void AggregateFunctions::FirstValUpdate( // palo_udf::FunctionContext*, impala::StringValue const&, impala::StringValue*); template void AggregateFunctions::first_val_update( palo_udf::FunctionContext*, palo_udf::DecimalVal const&, palo_udf::DecimalVal*); template void AggregateFunctions::last_val_update( FunctionContext*, const BooleanVal& src, BooleanVal* dst); template void AggregateFunctions::last_val_update( FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); template void AggregateFunctions::last_val_update( FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); template void AggregateFunctions::last_val_update( FunctionContext*, const IntVal& src, IntVal* dst); template void AggregateFunctions::last_val_update( FunctionContext*, const BigIntVal& src, BigIntVal* dst); template void AggregateFunctions::last_val_update( FunctionContext*, const FloatVal& src, FloatVal* dst); template void AggregateFunctions::last_val_update( FunctionContext*, const DoubleVal& src, DoubleVal* dst); template void AggregateFunctions::last_val_update( FunctionContext*, const StringVal& src, StringVal* dst); template void AggregateFunctions::last_val_update( FunctionContext*, const DateTimeVal& src, DateTimeVal* dst); template void AggregateFunctions::last_val_update( FunctionContext*, const DecimalVal& src, DecimalVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const BooleanVal& src, BooleanVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const TinyIntVal& src, TinyIntVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const SmallIntVal& src, SmallIntVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const IntVal& src, IntVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const BigIntVal& src, BigIntVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const FloatVal& src, FloatVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const DoubleVal& src, DoubleVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const StringVal& src, StringVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const DateTimeVal& src, DateTimeVal* dst); template void AggregateFunctions::last_val_remove( FunctionContext*, const DecimalVal& src, DecimalVal* dst); template void AggregateFunctions::offset_fn_init( FunctionContext*, BooleanVal*); template void AggregateFunctions::offset_fn_init( FunctionContext*, TinyIntVal*); template void AggregateFunctions::offset_fn_init( FunctionContext*, SmallIntVal*); template void AggregateFunctions::offset_fn_init( FunctionContext*, IntVal*); template void AggregateFunctions::offset_fn_init( FunctionContext*, BigIntVal*); template void AggregateFunctions::offset_fn_init( FunctionContext*, FloatVal*); template void AggregateFunctions::offset_fn_init( FunctionContext*, DoubleVal*); template void AggregateFunctions::offset_fn_init( FunctionContext*, StringVal*); template void AggregateFunctions::offset_fn_init( FunctionContext*, DateTimeVal*); template void AggregateFunctions::offset_fn_init( FunctionContext*, DecimalVal*); template void AggregateFunctions::offset_fn_update( FunctionContext*, const BooleanVal& src, const BigIntVal&, const BooleanVal&, BooleanVal* dst); template void AggregateFunctions::offset_fn_update( FunctionContext*, const TinyIntVal& src, const BigIntVal&, const TinyIntVal&, TinyIntVal* dst); template void AggregateFunctions::offset_fn_update( FunctionContext*, const SmallIntVal& src, const BigIntVal&, const SmallIntVal&, SmallIntVal* dst); template void AggregateFunctions::offset_fn_update( FunctionContext*, const IntVal& src, const BigIntVal&, const IntVal&, IntVal* dst); template void AggregateFunctions::offset_fn_update( FunctionContext*, const BigIntVal& src, const BigIntVal&, const BigIntVal&, BigIntVal* dst); template void AggregateFunctions::offset_fn_update( FunctionContext*, const FloatVal& src, const BigIntVal&, const FloatVal&, FloatVal* dst); template void AggregateFunctions::offset_fn_update( FunctionContext*, const DoubleVal& src, const BigIntVal&, const DoubleVal&, DoubleVal* dst); template void AggregateFunctions::offset_fn_update( FunctionContext*, const StringVal& src, const BigIntVal&, const StringVal&, StringVal* dst); template void AggregateFunctions::offset_fn_update( FunctionContext*, const DateTimeVal& src, const BigIntVal&, const DateTimeVal&, DateTimeVal* dst); template void AggregateFunctions::offset_fn_update( FunctionContext*, const DecimalVal& src, const BigIntVal&, const DecimalVal&, DecimalVal* dst); }