cherry pick from #38608
This commit is contained in:
@ -28,6 +28,8 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/exception.h"
|
||||
#include "common/status.h"
|
||||
#include "vec/aggregate_functions/aggregate_function.h"
|
||||
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
|
||||
#include "vec/columns/column.h"
|
||||
@ -57,12 +59,10 @@ template <typename T>
|
||||
struct AggregateFunctionHistogramData {
|
||||
using ColVecType =
|
||||
std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128V2>, ColumnVector<T>>;
|
||||
const static size_t DEFAULT_BUCKET_NUM = 128;
|
||||
const static size_t BUCKET_NUM_INIT_VALUE = 0;
|
||||
|
||||
void set_parameters(int input_max_num_buckets) {
|
||||
if (input_max_num_buckets > 0) {
|
||||
max_num_buckets = (size_t)input_max_num_buckets;
|
||||
}
|
||||
}
|
||||
void set_parameters(size_t input_max_num_buckets) { max_num_buckets = input_max_num_buckets; }
|
||||
|
||||
void reset() { ordered_map.clear(); }
|
||||
|
||||
@ -86,6 +86,8 @@ struct AggregateFunctionHistogramData {
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionHistogramData& rhs) {
|
||||
// if rhs.max_num_buckets == 0, it means the input block for serialization is all null
|
||||
// we should discard this data, because histogram only fouce on the not-null data
|
||||
if (!rhs.max_num_buckets) {
|
||||
return;
|
||||
}
|
||||
@ -104,7 +106,6 @@ struct AggregateFunctionHistogramData {
|
||||
|
||||
void write(BufferWritable& buf) const {
|
||||
write_binary(max_num_buckets, buf);
|
||||
|
||||
size_t element_number = (size_t)ordered_map.size();
|
||||
write_binary(element_number, buf);
|
||||
|
||||
@ -148,7 +149,13 @@ struct AggregateFunctionHistogramData {
|
||||
std::string get(const DataTypePtr& data_type) const {
|
||||
std::vector<Bucket<T>> buckets;
|
||||
rapidjson::StringBuffer buffer;
|
||||
build_histogram(buckets, ordered_map, max_num_buckets);
|
||||
// NOTE: We need an extral branch for to handle max_num_buckets == 0,
|
||||
// when target column is nullable, and input block is all null,
|
||||
// set_parameters will not be called because of the short-circuit in
|
||||
// AggregateFunctionNullVariadicInline, so max_num_buckets will be 0 in this situation.
|
||||
build_histogram(
|
||||
buckets, ordered_map,
|
||||
max_num_buckets == BUCKET_NUM_INIT_VALUE ? DEFAULT_BUCKET_NUM : max_num_buckets);
|
||||
histogram_to_json(buffer, buckets, data_type);
|
||||
return std::string(buffer.GetString());
|
||||
}
|
||||
@ -162,7 +169,7 @@ struct AggregateFunctionHistogramData {
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_num_buckets = 128;
|
||||
size_t max_num_buckets = BUCKET_NUM_INIT_VALUE;
|
||||
std::map<T, size_t> ordered_map;
|
||||
};
|
||||
|
||||
@ -186,13 +193,17 @@ public:
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num,
|
||||
Arena* arena) const override {
|
||||
if (columns[0]->is_null_at(row_num)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (has_input_param) {
|
||||
this->data(place).set_parameters(
|
||||
assert_cast<const ColumnInt32*>(columns[1])->get_element(row_num));
|
||||
if constexpr (has_input_param) {
|
||||
Int32 input_max_num_buckets =
|
||||
assert_cast<const ColumnInt32*>(columns[1])->get_element(row_num);
|
||||
if (input_max_num_buckets <= 0) {
|
||||
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
|
||||
"Invalid max_num_buckets {}, row_num {}",
|
||||
input_max_num_buckets, row_num);
|
||||
}
|
||||
this->data(place).set_parameters(input_max_num_buckets);
|
||||
} else {
|
||||
this->data(place).set_parameters(Data::DEFAULT_BUCKET_NUM);
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
|
||||
@ -118,3 +118,75 @@
|
||||
-- !select_minmax4 --
|
||||
243
|
||||
|
||||
-- !select_histogram_k0 --
|
||||
{"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":8,"pre_sum":7}]}
|
||||
|
||||
-- !select_histogram_k1 --
|
||||
{"num_buckets":1,"buckets":[{"lower":"1","upper":"15","ndv":15,"count":15,"pre_sum":0}]}
|
||||
|
||||
-- !select_histogram_k2 --
|
||||
{"num_buckets":2,"buckets":[{"lower":"-32767","upper":"1985","ndv":9,"count":11,"pre_sum":0},{"lower":"1986","upper":"32767","ndv":5,"count":10,"pre_sum":11}]}
|
||||
|
||||
-- !select_histogram_k3 --
|
||||
{"num_buckets":3,"buckets":[{"lower":"-2147483647","upper":"1001","ndv":3,"count":5,"pre_sum":0},{"lower":"1002","upper":"3021","ndv":2,"count":5,"pre_sum":5},{"lower":"5014","upper":"2147483647","ndv":3,"count":5,"pre_sum":10}]}
|
||||
|
||||
-- !select_histogram_k4 --
|
||||
{"num_buckets":4,"buckets":[{"lower":"-9223372036854775807","upper":"123456","ndv":4,"count":5,"pre_sum":0},{"lower":"7210457","upper":"11011903","ndv":3,"count":5,"pre_sum":5},{"lower":"11011905","upper":"11011920","ndv":2,"count":3,"pre_sum":10},{"lower":"9223372036854775807","upper":"9223372036854775807","ndv":1,"count":2,"pre_sum":13}]}
|
||||
|
||||
-- !select_histogram_k5 --
|
||||
{"num_buckets":5,"buckets":[{"lower":"-654.654","upper":"-0.123","ndv":3,"count":3,"pre_sum":0},{"lower":"0.000","upper":"0.666","ndv":2,"count":3,"pre_sum":3},{"lower":"3.141","upper":"123.123","ndv":3,"count":3,"pre_sum":6},{"lower":"243.325","upper":"1243.500","ndv":2,"count":3,"pre_sum":9},{"lower":"24453.325","upper":"604587.000","ndv":3,"count":3,"pre_sum":12}]}
|
||||
|
||||
-- !select_histogram_k6 --
|
||||
{"num_buckets":2,"buckets":[{"lower":"false","upper":"false","ndv":1,"count":8,"pre_sum":0},{"lower":"true","upper":"true","ndv":1,"count":7,"pre_sum":8}]}
|
||||
|
||||
-- !select_histogram_k7 --
|
||||
{"num_buckets":7,"buckets":[{"lower":"","upper":"du3lnvl","ndv":3,"count":3,"pre_sum":0},{"lower":"jiw3n4","upper":"lifsno","ndv":2,"count":2,"pre_sum":3},{"lower":"wangjuoo4","upper":"wangjuoo5","ndv":2,"count":3,"pre_sum":5},{"lower":"wangynnsf","upper":"wenlsfnl","ndv":2,"count":3,"pre_sum":8},{"lower":"yanavnd","upper":"yanavnd","ndv":1,"count":1,"pre_sum":11},{"lower":"yanvjldjlll","upper":"yanvjldjlll","ndv":1,"count":1,"pre_sum":12},{"lower":"yunlj8@nk","upper":"yunlj8@nk","ndv":1,"count":2,"pre_sum":13}]}
|
||||
|
||||
-- !select_histogram_k10 --
|
||||
{"num_buckets":10,"buckets":[{"lower":"1901-12-31","upper":"1901-12-31","ndv":1,"count":1,"pre_sum":0},{"lower":"1988-03-21","upper":"1988-03-21","ndv":1,"count":1,"pre_sum":1},{"lower":"1989-03-21","upper":"1989-03-21","ndv":1,"count":2,"pre_sum":2},{"lower":"1991-08-11","upper":"1991-08-11","ndv":1,"count":2,"pre_sum":4},{"lower":"2012-03-14","upper":"2012-03-14","ndv":1,"count":1,"pre_sum":6},{"lower":"2014-11-11","upper":"2014-11-11","ndv":1,"count":1,"pre_sum":7},{"lower":"2015-01-01","upper":"2015-01-01","ndv":1,"count":1,"pre_sum":8},{"lower":"2015-04-02","upper":"2015-04-02","ndv":1,"count":4,"pre_sum":9},{"lower":"3124-10-10","upper":"3124-10-10","ndv":1,"count":1,"pre_sum":13},{"lower":"9999-12-12","upper":"9999-12-12","ndv":1,"count":1,"pre_sum":14}]}
|
||||
|
||||
-- !select_histogram_k11 --
|
||||
{"num_buckets":9,"buckets":[{"lower":"1901-01-01 00:00:00","upper":"1901-01-01 00:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"1989-03-21 13:00:00","upper":"1989-03-21 13:00:00","ndv":1,"count":2,"pre_sum":1},{"lower":"1989-03-21 13:11:00","upper":"1989-03-21 13:11:00","ndv":1,"count":2,"pre_sum":3},{"lower":"2000-01-01 00:00:00","upper":"2000-01-01 00:00:00","ndv":1,"count":1,"pre_sum":5},{"lower":"2013-04-02 15:16:52","upper":"2013-04-02 15:16:52","ndv":1,"count":2,"pre_sum":6},{"lower":"2015-03-13 10:30:00","upper":"2015-03-13 10:30:00","ndv":1,"count":1,"pre_sum":8},{"lower":"2015-03-13 12:36:38","upper":"2015-03-13 12:36:38","ndv":1,"count":2,"pre_sum":9},{"lower":"2015-04-02 00:00:00","upper":"2015-04-02 00:00:00","ndv":1,"count":3,"pre_sum":11},{"lower":"9999-11-11 12:12:00","upper":"9999-11-11 12:12:00","ndv":1,"count":1,"pre_sum":14}]}
|
||||
|
||||
-- !select_histogram_k12 --
|
||||
{"num_buckets":1,"buckets":[{"lower":"string12345","upper":"string12345","ndv":1,"count":15,"pre_sum":0}]}
|
||||
|
||||
-- !select_histogram_k13 --
|
||||
{"num_buckets":13,"buckets":[{"lower":"-170141183460469231731687303715884105727","upper":"-20220101","ndv":2,"count":2,"pre_sum":0},{"lower":"-11011903","upper":"-2022","ndv":2,"count":2,"pre_sum":2},{"lower":"0","upper":"0","ndv":1,"count":1,"pre_sum":4},{"lower":"11011903","upper":"11011903","ndv":1,"count":1,"pre_sum":5},{"lower":"20220101","upper":"20220101","ndv":1,"count":1,"pre_sum":6},{"lower":"20220102","upper":"20220102","ndv":1,"count":1,"pre_sum":7},{"lower":"20220104","upper":"20220104","ndv":1,"count":1,"pre_sum":8},{"lower":"701411834604692317","upper":"701411834604692317","ndv":1,"count":1,"pre_sum":9},{"lower":"701411834604692317316873","upper":"701411834604692317316873","ndv":1,"count":1,"pre_sum":10},{"lower":"1701604692317316873037158","upper":"1701604692317316873037158","ndv":1,"count":1,"pre_sum":11},{"lower":"701411834604692317316873037158","upper":"701411834604692317316873037158","ndv":1,"count":1,"pre_sum":12},{"lower":"1701411834604692317316873037158","upper":"1701411834604692317316873037158","ndv":1,"count":1,"pre_sum":13},{"lower":"170141183460469231731687303715884105727","upper":"170141183460469231731687303715884105727","ndv":1,"count":1,"pre_sum":14}]}
|
||||
|
||||
-- !select_histogram_k0_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k1_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k2 --
|
||||
{"num_buckets":2,"buckets":[{"lower":"10","upper":"12","ndv":3,"count":3,"pre_sum":0},{"lower":"13","upper":"15","ndv":3,"count":3,"pre_sum":3}]}
|
||||
|
||||
-- !select_histogram_k3_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k4_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k5_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k6_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k7_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k10_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k11_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k12_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
-- !select_histogram_k13_all_null --
|
||||
{"num_buckets":0,"buckets":[]}
|
||||
|
||||
|
||||
@ -113,4 +113,99 @@ suite("test_aggregate_all_functions2") {
|
||||
qt_select_minmax2 """ select max_by(datekey,hour) from metric_table; """
|
||||
qt_select_minmax3 """ select bitmap_to_string(max_by(device_id,hour)) from metric_table; """
|
||||
qt_select_minmax4 """ select bitmap_to_string(min_by(device_id,hour)) from metric_table; """
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 10, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 11, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 12, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 13, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 14, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 15, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
qt_select_histogram_k0 """SELECT histogram(k0) FROM baseall"""
|
||||
qt_select_histogram_k1 """SELECT histogram(k1, 1) FROM baseall"""
|
||||
qt_select_histogram_k2 """SELECT histogram(k2, 2) FROM baseall"""
|
||||
qt_select_histogram_k3 """SELECT histogram(k3, 3) FROM baseall"""
|
||||
qt_select_histogram_k4 """SELECT histogram(k4, 4) FROM baseall"""
|
||||
qt_select_histogram_k5 """SELECT histogram(k5, 5) FROM baseall"""
|
||||
qt_select_histogram_k6 """SELECT histogram(k6, 6) FROM baseall"""
|
||||
qt_select_histogram_k7 """SELECT histogram(k7, 7) FROM baseall"""
|
||||
// the test case for double and float is removed, becase the result is not stable since we have
|
||||
// 0 and -0 in column k8, both of them are valid but we can not make both of them stand in out file.
|
||||
// qt_select_histogram_k8 """SELECT histogram(k8, 8) FROM baseall"""
|
||||
// qt_select_histogram_k9 """SELECT histogram(k9, 9) FROM baseall"""
|
||||
qt_select_histogram_k10 """SELECT histogram(k10, 10) FROM baseall"""
|
||||
qt_select_histogram_k11 """SELECT histogram(k11, 11) FROM baseall"""
|
||||
qt_select_histogram_k12 """SELECT histogram(k12, 12) FROM baseall"""
|
||||
qt_select_histogram_k13 """SELECT histogram(k13, 13) FROM baseall"""
|
||||
|
||||
sql """
|
||||
TRUNCATE TABLE baseall;
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 10, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 11, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 12, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 13, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 14, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO baseall values
|
||||
(NULL, NULL, 15, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
||||
"""
|
||||
qt_select_histogram_k0_all_null """SELECT histogram(k0) FROM baseall"""
|
||||
qt_select_histogram_k1_all_null """SELECT histogram(k1, 1) FROM baseall"""
|
||||
qt_select_histogram_k2 """SELECT histogram(k2, 2) FROM baseall"""
|
||||
qt_select_histogram_k3_all_null """SELECT histogram(k3, 3) FROM baseall"""
|
||||
qt_select_histogram_k4_all_null """SELECT histogram(k4, 4) FROM baseall"""
|
||||
qt_select_histogram_k5_all_null """SELECT histogram(k5, 5) FROM baseall"""
|
||||
qt_select_histogram_k6_all_null """SELECT histogram(k6, 6) FROM baseall"""
|
||||
qt_select_histogram_k7_all_null """SELECT histogram(k7, 7) FROM baseall"""
|
||||
// qt_select_histogram_k8_all_null """SELECT histogram(k8, 8) FROM baseall"""
|
||||
// qt_select_histogram_k9_all_null """SELECT histogram(k9, 9) FROM baseall"""
|
||||
qt_select_histogram_k10_all_null """SELECT histogram(k10, 10) FROM baseall"""
|
||||
qt_select_histogram_k11_all_null """SELECT histogram(k11, 11) FROM baseall"""
|
||||
qt_select_histogram_k12_all_null """SELECT histogram(k12, 12) FROM baseall"""
|
||||
qt_select_histogram_k13_all_null """SELECT histogram(k13, 13) FROM baseall"""
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user