diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 198ad887d7..f643531d68 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -503,9 +503,12 @@ vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union( } vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(std::string suffix) const { - std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation); auto type = vectorized::DataTypeFactory::instance().create_data_type(*this); + if (type && type->get_type_as_primitive_type() == PrimitiveType::TYPE_AGG_STATE) { + return get_aggregate_function_union(type); + } + std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation); std::string agg_name = origin_name + suffix; std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(), [](unsigned char c) { return std::tolower(c); }); @@ -515,12 +518,9 @@ vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(std::strin if (function) { return function; } - if (type->get_type_as_primitive_type() != PrimitiveType::TYPE_AGG_STATE) { - LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name - << ", column_type=" << type->get_name(); - return nullptr; - } - return get_aggregate_function_union(type); + LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name + << ", column_type=" << type->get_name(); + return nullptr; } void TabletIndex::init_from_thrift(const TOlapTableIndex& index, diff --git a/be/src/util/quantile_state.cpp b/be/src/util/quantile_state.cpp index 40aba9006f..b0cacf30ea 100644 --- a/be/src/util/quantile_state.cpp +++ b/be/src/util/quantile_state.cpp @@ -29,30 +29,26 @@ namespace doris { -template -QuantileState::QuantileState() : _type(EMPTY), _compression(QUANTILE_STATE_COMPRESSION_MIN) {} +QuantileState::QuantileState() : _type(EMPTY), _compression(QUANTILE_STATE_COMPRESSION_MIN) {} -template -QuantileState::QuantileState(float compression) : _type(EMPTY), _compression(compression) {} +QuantileState::QuantileState(float compression) : _type(EMPTY), _compression(compression) {} -template -QuantileState::QuantileState(const Slice& slice) { +QuantileState::QuantileState(const Slice& slice) { if (!deserialize(slice)) { _type = EMPTY; } } -template -size_t QuantileState::get_serialized_size() { +size_t QuantileState::get_serialized_size() { size_t size = 1 + sizeof(float); // type(QuantileStateType) + compression(float) switch (_type) { case EMPTY: break; case SINGLE: - size += sizeof(T); + size += sizeof(double); break; case EXPLICIT: - size += sizeof(uint16_t) + sizeof(T) * _explicit_data.size(); + size += sizeof(uint16_t) + sizeof(double) * _explicit_data.size(); break; case TDIGEST: size += _tdigest_ptr->serialized_size(); @@ -61,15 +57,13 @@ size_t QuantileState::get_serialized_size() { return size; } -template -void QuantileState::set_compression(float compression) { +void QuantileState::set_compression(float compression) { DCHECK(compression >= QUANTILE_STATE_COMPRESSION_MIN && compression <= QUANTILE_STATE_COMPRESSION_MAX); this->_compression = compression; } -template -bool QuantileState::is_valid(const Slice& slice) { +bool QuantileState::is_valid(const Slice& slice) { if (slice.size < 1) { return false; } @@ -87,10 +81,10 @@ bool QuantileState::is_valid(const Slice& slice) { case EMPTY: break; case SINGLE: { - if ((ptr + sizeof(T)) > end) { + if ((ptr + sizeof(double)) > end) { return false; } - ptr += sizeof(T); + ptr += sizeof(double); break; } case EXPLICIT: { @@ -99,7 +93,7 @@ bool QuantileState::is_valid(const Slice& slice) { } uint16_t num_explicits = decode_fixed16_le(ptr); ptr += sizeof(uint16_t); - ptr += num_explicits * sizeof(T); + ptr += num_explicits * sizeof(double); break; } case TDIGEST: { @@ -116,11 +110,10 @@ bool QuantileState::is_valid(const Slice& slice) { return ptr == end; } -template -T QuantileState::get_explicit_value_by_percentile(float percentile) const { +double QuantileState::get_explicit_value_by_percentile(float percentile) const { DCHECK(_type == EXPLICIT); int n = _explicit_data.size(); - std::vector sorted_data(_explicit_data.begin(), _explicit_data.end()); + std::vector sorted_data(_explicit_data.begin(), _explicit_data.end()); std::sort(sorted_data.begin(), sorted_data.end()); double index = (n - 1) * percentile; @@ -131,8 +124,7 @@ T QuantileState::get_explicit_value_by_percentile(float percentile) const { return sorted_data[intIdx + 1] * (index - intIdx) + sorted_data[intIdx] * (intIdx + 1 - index); } -template -T QuantileState::get_value_by_percentile(float percentile) const { +double QuantileState::get_value_by_percentile(float percentile) const { DCHECK(percentile >= 0 && percentile <= 1); switch (_type) { case EMPTY: { @@ -153,8 +145,7 @@ T QuantileState::get_value_by_percentile(float percentile) const { return NAN; } -template -bool QuantileState::deserialize(const Slice& slice) { +bool QuantileState::deserialize(const Slice& slice) { DCHECK(_type == EMPTY); // in case of insert error data caused be crashed @@ -178,8 +169,8 @@ bool QuantileState::deserialize(const Slice& slice) { break; case SINGLE: { // 2: single_data value - _single_data = *reinterpret_cast(ptr); - ptr += sizeof(T); + _single_data = *reinterpret_cast(ptr); + ptr += sizeof(double); break; } case EXPLICIT: { @@ -189,8 +180,8 @@ bool QuantileState::deserialize(const Slice& slice) { ptr += sizeof(uint16_t); _explicit_data.reserve(std::min(num_explicits * 2, QUANTILE_STATE_EXPLICIT_NUM)); _explicit_data.resize(num_explicits); - memcpy(&_explicit_data[0], ptr, num_explicits * sizeof(T)); - ptr += num_explicits * sizeof(T); + memcpy(&_explicit_data[0], ptr, num_explicits * sizeof(double)); + ptr += num_explicits * sizeof(double); break; } case TDIGEST: { @@ -207,8 +198,7 @@ bool QuantileState::deserialize(const Slice& slice) { return true; } -template -size_t QuantileState::serialize(uint8_t* dst) const { +size_t QuantileState::serialize(uint8_t* dst) const { uint8_t* ptr = dst; *reinterpret_cast(ptr) = _compression; ptr += sizeof(float); @@ -219,8 +209,8 @@ size_t QuantileState::serialize(uint8_t* dst) const { } case SINGLE: { *ptr++ = SINGLE; - *reinterpret_cast(ptr) = _single_data; - ptr += sizeof(T); + *reinterpret_cast(ptr) = _single_data; + ptr += sizeof(double); break; } case EXPLICIT: { @@ -228,8 +218,8 @@ size_t QuantileState::serialize(uint8_t* dst) const { uint16_t size = _explicit_data.size(); *reinterpret_cast(ptr) = size; ptr += sizeof(uint16_t); - memcpy(ptr, &_explicit_data[0], size * sizeof(T)); - ptr += size * sizeof(T); + memcpy(ptr, &_explicit_data[0], size * sizeof(double)); + ptr += size * sizeof(double); break; } case TDIGEST: { @@ -244,8 +234,7 @@ size_t QuantileState::serialize(uint8_t* dst) const { return ptr - dst; } -template -void QuantileState::merge(const QuantileState& other) { +void QuantileState::merge(const QuantileState& other) { switch (other._type) { case EMPTY: break; @@ -293,16 +282,16 @@ void QuantileState::merge(const QuantileState& other) { switch (_type) { case EMPTY: _type = TDIGEST; - _tdigest_ptr = std::move(other._tdigest_ptr); + _tdigest_ptr = other._tdigest_ptr; break; case SINGLE: _type = TDIGEST; - _tdigest_ptr = std::move(other._tdigest_ptr); + _tdigest_ptr = other._tdigest_ptr; _tdigest_ptr->add(_single_data); break; case EXPLICIT: _type = TDIGEST; - _tdigest_ptr = std::move(other._tdigest_ptr); + _tdigest_ptr = other._tdigest_ptr; for (int i = 0; i < _explicit_data.size(); i++) { _tdigest_ptr->add(_explicit_data[i]); } @@ -320,8 +309,7 @@ void QuantileState::merge(const QuantileState& other) { } } -template -void QuantileState::add_value(const T& value) { +void QuantileState::add_value(const double& value) { switch (_type) { case EMPTY: _single_data = value; @@ -352,14 +340,11 @@ void QuantileState::add_value(const T& value) { } } -template -void QuantileState::clear() { +void QuantileState::clear() { _type = EMPTY; _tdigest_ptr.reset(); _explicit_data.clear(); _explicit_data.shrink_to_fit(); } -template class QuantileState; - } // namespace doris diff --git a/be/src/util/quantile_state.h b/be/src/util/quantile_state.h index 6f3a92b2a4..c3b8cf8300 100644 --- a/be/src/util/quantile_state.h +++ b/be/src/util/quantile_state.h @@ -41,7 +41,6 @@ enum QuantileStateType { TDIGEST = 3 // TDIGEST object }; -template class QuantileState { public: QuantileState(); @@ -50,22 +49,21 @@ public: void set_compression(float compression); bool deserialize(const Slice& slice); size_t serialize(uint8_t* dst) const; - void merge(const QuantileState& other); - void add_value(const T& value); + void merge(const QuantileState& other); + void add_value(const double& value); void clear(); bool is_valid(const Slice& slice); size_t get_serialized_size(); - T get_value_by_percentile(float percentile) const; - T get_explicit_value_by_percentile(float percentile) const; + double get_value_by_percentile(float percentile) const; + double get_explicit_value_by_percentile(float percentile) const; ~QuantileState() = default; private: QuantileStateType _type = EMPTY; std::shared_ptr _tdigest_ptr; - T _single_data; - std::vector _explicit_data; + double _single_data; + std::vector _explicit_data; float _compression; }; -using QuantileStateDouble = QuantileState; } // namespace doris diff --git a/be/src/vec/aggregate_functions/aggregate_function_quantile_state.cpp b/be/src/vec/aggregate_functions/aggregate_function_quantile_state.cpp index 314ab5c37b..f50870a277 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_quantile_state.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_quantile_state.cpp @@ -29,11 +29,13 @@ AggregateFunctionPtr create_aggregate_function_quantile_state_union(const std::s const bool result_is_nullable) { const bool arg_is_nullable = argument_types[0]->is_nullable(); if (arg_is_nullable) { - return std::make_shared>(argument_types); + return std::make_shared< + AggregateFunctionQuantileStateOp>( + argument_types); } else { - return std::make_shared>(argument_types); + return std::make_shared< + AggregateFunctionQuantileStateOp>( + argument_types); } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_quantile_state.h b/be/src/vec/aggregate_functions/aggregate_function_quantile_state.h index e368c58238..a7c22cab48 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_quantile_state.h +++ b/be/src/vec/aggregate_functions/aggregate_function_quantile_state.h @@ -46,13 +46,9 @@ namespace doris::vectorized { struct AggregateFunctionQuantileStateUnionOp { static constexpr auto name = "quantile_union"; - template - static void add(QuantileState& res, const T& data, bool& is_first) { - res.add_value(data); - } + static void add(QuantileState& res, const double& data, bool& is_first) { res.add_value(data); } - template - static void add(QuantileState& res, const QuantileState& data, bool& is_first) { + static void add(QuantileState& res, const QuantileState& data, bool& is_first) { if (UNLIKELY(is_first)) { res = data; is_first = false; @@ -61,8 +57,7 @@ struct AggregateFunctionQuantileStateUnionOp { } } - template - static void merge(QuantileState& res, const QuantileState& data, bool& is_first) { + static void merge(QuantileState& res, const QuantileState& data, bool& is_first) { if (UNLIKELY(is_first)) { res = data; is_first = false; @@ -72,9 +67,9 @@ struct AggregateFunctionQuantileStateUnionOp { } }; -template +template struct AggregateFunctionQuantileStateData { - using DataType = QuantileState; + using DataType = QuantileState; DataType value; bool is_first = true; @@ -86,38 +81,35 @@ struct AggregateFunctionQuantileStateData { void merge(const DataType& data) { Op::merge(value, data, is_first); } void write(BufferWritable& buf) const { - DataTypeQuantileState::serialize_as_stream(value, buf); + DataTypeQuantileState::serialize_as_stream(value, buf); } - void read(BufferReadable& buf) { - DataTypeQuantileState::deserialize_as_stream(value, buf); - } + void read(BufferReadable& buf) { DataTypeQuantileState::deserialize_as_stream(value, buf); } void reset() { is_first = true; } DataType& get() { return value; } }; -template +template class AggregateFunctionQuantileStateOp final : public IAggregateFunctionDataHelper< - AggregateFunctionQuantileStateData, - AggregateFunctionQuantileStateOp> { + AggregateFunctionQuantileStateData, + AggregateFunctionQuantileStateOp> { public: - using ResultDataType = QuantileState; - using ColVecType = ColumnQuantileState; - using ColVecResult = ColumnQuantileState; + using ResultDataType = QuantileState; + using ColVecType = ColumnQuantileState; + using ColVecResult = ColumnQuantileState; String get_name() const override { return Op::name; } AggregateFunctionQuantileStateOp(const DataTypes& argument_types_) - : IAggregateFunctionDataHelper< - AggregateFunctionQuantileStateData, - AggregateFunctionQuantileStateOp>( + : IAggregateFunctionDataHelper, + AggregateFunctionQuantileStateOp>( argument_types_) {} DataTypePtr get_return_type() const override { - return std::make_shared>(); + return std::make_shared(); } void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num, @@ -138,8 +130,7 @@ public: void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override { this->data(place).merge( - const_cast&>(this->data(rhs)) - .get()); + const_cast&>(this->data(rhs)).get()); } void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { @@ -154,8 +145,7 @@ public: void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { auto& column = assert_cast(to); column.get_data().push_back( - const_cast&>(this->data(place)) - .get()); + const_cast&>(this->data(place)).get()); } void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index 81cd6a45cb..e63e206ac9 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -50,7 +50,7 @@ public: bool is_bitmap() const override { return std::is_same_v; } bool is_hll() const override { return std::is_same_v; } - bool is_quantile_state() const override { return std::is_same_v>; } + bool is_quantile_state() const override { return std::is_same_v; } size_t size() const override { return data.size(); } @@ -78,7 +78,7 @@ public: pvalue->deserialize(pos); } else if constexpr (std::is_same_v) { pvalue->deserialize(Slice(pos, length)); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { pvalue->deserialize(Slice(pos, length)); } else { LOG(FATAL) << "Unexpected type in column complex"; @@ -423,13 +423,7 @@ void ColumnComplexType::replicate(const uint32_t* indexs, size_t target_size, using ColumnBitmap = ColumnComplexType; using ColumnHLL = ColumnComplexType; - -template -using ColumnQuantileState = ColumnComplexType>; - -using ColumnQuantileStateDouble = ColumnQuantileState; - -//template class ColumnQuantileState; +using ColumnQuantileState = ColumnComplexType; template struct is_complex : std::false_type {}; @@ -443,8 +437,8 @@ struct is_complex : std::true_type {}; //DataTypeHLL::FieldType = HyperLogLog template <> -struct is_complex> : std::true_type {}; -//DataTypeQuantileState::FieldType = QuantileState +struct is_complex : std::true_type {}; +//DataTypeQuantileState::FieldType = QuantileState template constexpr bool is_complex_v = is_complex::value; diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 941ca3fe3d..9aadfe2a0a 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -562,7 +562,7 @@ private: std::aligned_union_t, DecimalField, DecimalField, - DecimalField, BitmapValue, HyperLogLog, QuantileState> + DecimalField, BitmapValue, HyperLogLog, QuantileState> storage; Types::Which which; @@ -650,7 +650,7 @@ private: f(field.template get()); return; case Types::QuantileState: - f(field.template get>()); + f(field.template get()); return; default: LOG(FATAL) << "type not supported, type=" << Types::to_string(field.which); @@ -828,7 +828,7 @@ struct Field::TypeToEnum { }; template <> -struct Field::TypeToEnum> { +struct Field::TypeToEnum { static constexpr Types::Which value = Types::QuantileState; }; diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 8d25b49e7a..a92c183c89 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -33,12 +33,11 @@ namespace doris { class BitmapValue; class HyperLogLog; +class QuantileState; + struct decimal12_t; struct uint24_t; -template -class QuantileState; - namespace vectorized { /// Data types for representing elementary values from a database in RAM. @@ -215,7 +214,7 @@ struct TypeName { }; template <> -struct TypeName> { +struct TypeName { static const char* get() { return "QuantileState"; } }; @@ -703,7 +702,7 @@ inline const char* getTypeName(TypeIndex idx) { case TypeIndex::Struct: return "Struct"; case TypeIndex::QuantileState: - return TypeName>::get(); + return TypeName::get(); case TypeIndex::AggState: return "AggState"; case TypeIndex::Time: diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h index bdd9598836..a9efbeea75 100644 --- a/be/src/vec/data_types/data_type_decimal.h +++ b/be/src/vec/data_types/data_type_decimal.h @@ -155,7 +155,7 @@ public: if constexpr (std::is_same_v, TypeId>) { return TYPE_DECIMAL128I; } - __builtin_unreachable(); + return TYPE_DECIMALV2; } TPrimitiveType::type get_type_as_tprimitive_type() const override { diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 50b52d98fe..971b37eb8f 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -179,7 +179,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo nested = std::make_shared>(27, 9); break; case TYPE_QUANTILE_STATE: - nested = std::make_shared(); + nested = std::make_shared(); break; case TYPE_DECIMAL32: case TYPE_DECIMAL64: @@ -310,7 +310,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool nested = std::make_shared(); break; case TypeIndex::QuantileState: - nested = std::make_shared(); + nested = std::make_shared(); break; case TypeIndex::TimeV2: case TypeIndex::Time: @@ -385,7 +385,7 @@ DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& type, result = std::make_shared>(27, 9); break; case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE: - result = std::make_shared(); + result = std::make_shared(); break; case FieldType::OLAP_FIELD_TYPE_DECIMAL32: case FieldType::OLAP_FIELD_TYPE_DECIMAL64: @@ -511,7 +511,7 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { break; } case PGenericType::QUANTILE_STATE: { - nested = std::make_shared(); + nested = std::make_shared(); break; } case PGenericType::TIME: diff --git a/be/src/vec/data_types/data_type_quantilestate.cpp b/be/src/vec/data_types/data_type_quantilestate.cpp index a6a37e1f07..66951bdd57 100644 --- a/be/src/vec/data_types/data_type_quantilestate.cpp +++ b/be/src/vec/data_types/data_type_quantilestate.cpp @@ -29,40 +29,38 @@ namespace doris::vectorized { // binary: | // : row num | quantilestate1 size | quantilestate2 size | ... // : quantilestate1 | quantilestate2 | ... -template -int64_t DataTypeQuantileState::get_uncompressed_serialized_bytes(const IColumn& column, - int be_exec_version) const { +int64_t DataTypeQuantileState::get_uncompressed_serialized_bytes(const IColumn& column, + int be_exec_version) const { auto ptr = column.convert_to_full_column_if_const(); - auto& data_column = assert_cast&>(*ptr); + auto& data_column = assert_cast(*ptr); auto allocate_len_size = sizeof(size_t) * (column.size() + 1); auto allocate_content_size = 0; for (size_t i = 0; i < column.size(); ++i) { - auto& quantile_state = const_cast&>(data_column.get_element(i)); + auto& quantile_state = const_cast(data_column.get_element(i)); allocate_content_size += quantile_state.get_serialized_size(); } return allocate_len_size + allocate_content_size; } -template -char* DataTypeQuantileState::serialize(const IColumn& column, char* buf, - int be_exec_version) const { +char* DataTypeQuantileState::serialize(const IColumn& column, char* buf, + int be_exec_version) const { auto ptr = column.convert_to_full_column_if_const(); - auto& data_column = assert_cast&>(*ptr); + auto& data_column = assert_cast(*ptr); // serialize the quantile_state size array, row num saves at index 0 size_t* meta_ptr = (size_t*)buf; meta_ptr[0] = column.size(); for (size_t i = 0; i < meta_ptr[0]; ++i) { - auto& quantile_state = const_cast&>(data_column.get_element(i)); + auto& quantile_state = const_cast(data_column.get_element(i)); meta_ptr[i + 1] = quantile_state.get_serialized_size(); } // serialize each quantile_state char* data_ptr = buf + sizeof(size_t) * (meta_ptr[0] + 1); for (size_t i = 0; i < meta_ptr[0]; ++i) { - auto& quantile_state = const_cast&>(data_column.get_element(i)); + auto& quantile_state = const_cast(data_column.get_element(i)); quantile_state.serialize((uint8_t*)data_ptr); data_ptr += meta_ptr[i + 1]; } @@ -70,10 +68,9 @@ char* DataTypeQuantileState::serialize(const IColumn& column, char* buf, return data_ptr; } -template -const char* DataTypeQuantileState::deserialize(const char* buf, IColumn* column, - int be_exec_version) const { - auto& data_column = assert_cast&>(*column); +const char* DataTypeQuantileState::deserialize(const char* buf, IColumn* column, + int be_exec_version) const { + auto& data_column = assert_cast(*column); auto& data = data_column.get_data(); // deserialize the quantile_state size array @@ -91,15 +88,12 @@ const char* DataTypeQuantileState::deserialize(const char* buf, IColumn* colu return data_ptr; } -template -MutableColumnPtr DataTypeQuantileState::create_column() const { - return ColumnQuantileState::create(); +MutableColumnPtr DataTypeQuantileState::create_column() const { + return ColumnQuantileState::create(); } -template -void DataTypeQuantileState::serialize_as_stream(const QuantileState& cvalue, - BufferWritable& buf) { - auto& value = const_cast&>(cvalue); +void DataTypeQuantileState::serialize_as_stream(const QuantileState& cvalue, BufferWritable& buf) { + auto& value = const_cast(cvalue); std::string memory_buffer; int bytesize = value.get_serialized_size(); memory_buffer.resize(bytesize); @@ -107,24 +101,20 @@ void DataTypeQuantileState::serialize_as_stream(const QuantileState& cvalu write_string_binary(memory_buffer, buf); } -template -void DataTypeQuantileState::deserialize_as_stream(QuantileState& value, BufferReadable& buf) { +void DataTypeQuantileState::deserialize_as_stream(QuantileState& value, BufferReadable& buf) { StringRef ref; read_string_binary(ref, buf); value.deserialize(ref.to_slice()); } -template -void DataTypeQuantileState::to_string(const class doris::vectorized::IColumn& column, - size_t row_num, - doris::vectorized::BufferWritable& ostr) const { - auto& data = const_cast&>( - assert_cast&>(column).get_element(row_num)); +void DataTypeQuantileState::to_string(const class doris::vectorized::IColumn& column, + size_t row_num, + doris::vectorized::BufferWritable& ostr) const { + auto& data = const_cast( + assert_cast(column).get_element(row_num)); std::string result(data.get_serialized_size(), '0'); data.serialize((uint8_t*)result.data()); ostr.write(result.data(), result.size()); } -template class DataTypeQuantileState; - } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/data_types/data_type_quantilestate.h b/be/src/vec/data_types/data_type_quantilestate.h index c3bcf29464..2bfc56b41c 100644 --- a/be/src/vec/data_types/data_type_quantilestate.h +++ b/be/src/vec/data_types/data_type_quantilestate.h @@ -17,10 +17,7 @@ #pragma once -#include #include -#include -#include #include #include @@ -45,13 +42,12 @@ class IColumn; } // namespace doris namespace doris::vectorized { -template class DataTypeQuantileState : public IDataType { public: DataTypeQuantileState() = default; ~DataTypeQuantileState() override = default; - using ColumnType = ColumnQuantileState; - using FieldType = QuantileState; + using ColumnType = ColumnQuantileState; + using FieldType = QuantileState; std::string do_get_name() const override { return get_family_name(); } const char* get_family_name() const override { return "QuantileState"; } @@ -93,19 +89,17 @@ public: } void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; - Field get_default() const override { return QuantileState(); } + Field get_default() const override { return QuantileState(); } [[noreturn]] Field get_field(const TExprNode& node) const override { LOG(FATAL) << "Unimplemented get_field for quantilestate"; } - static void serialize_as_stream(const QuantileState& value, BufferWritable& buf); + static void serialize_as_stream(const QuantileState& value, BufferWritable& buf); - static void deserialize_as_stream(QuantileState& value, BufferReadable& buf); + static void deserialize_as_stream(QuantileState& value, BufferReadable& buf); DataTypeSerDeSPtr get_serde() const override { - return std::make_shared>(); + return std::make_shared(); }; }; -using DataTypeQuantileStateDouble = DataTypeQuantileState; - } // namespace doris::vectorized diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.cpp b/be/src/vec/data_types/serde/data_type_quantilestate_serde.cpp deleted file mode 100644 index 6fee387bd0..0000000000 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "data_type_quantilestate_serde.h" -namespace doris { - -namespace vectorized {} // namespace vectorized -} // namespace doris \ No newline at end of file diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h index e9fe7a8a5d..112e132617 100644 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h +++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h @@ -37,7 +37,6 @@ namespace doris { namespace vectorized { -template class DataTypeQuantileStateSerDe : public DataTypeSerDe { public: void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, @@ -65,13 +64,41 @@ public: } Status write_column_to_pb(const IColumn& column, PValues& result, int start, - int end) const override; - Status read_column_from_pb(IColumn& column, const PValues& arg) const override; + int end) const override { + result.mutable_bytes_value()->Reserve(end - start); + for (size_t row_num = start; row_num < end; ++row_num) { + StringRef data = column.get_data_at(row_num); + result.add_bytes_value(data.to_string()); + } + return Status::OK(); + } + Status read_column_from_pb(IColumn& column, const PValues& arg) const override { + column.reserve(arg.bytes_value_size()); + for (int i = 0; i < arg.bytes_value_size(); ++i) { + column.insert_data(arg.bytes_value(i).c_str(), arg.bytes_value(i).size()); + } + return Status::OK(); + } void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, - int32_t col_id, int row_num) const override; + int32_t col_id, int row_num) const override { + auto& col = reinterpret_cast(column); + auto& val = const_cast(col.get_element(row_num)); + size_t actual_size = val.get_serialized_size(); + auto ptr = mem_pool->alloc(actual_size); + result.writeKey(col_id); + result.writeStartBinary(); + result.writeBinary(reinterpret_cast(ptr), actual_size); + result.writeEndBinary(); + } - void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override; + void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override { + auto& col = reinterpret_cast(column); + auto blob = static_cast(arg); + QuantileState val; + val.deserialize(Slice(blob->getBlob())); + col.insert_value(val); + } void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { @@ -85,9 +112,13 @@ public: } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, - int row_idx, bool col_const) const override; + int row_idx, bool col_const) const override { + return _write_column_to_mysql(column, row_buffer, row_idx, col_const); + } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, - int row_idx, bool col_const) const override; + int row_idx, bool col_const) const override { + return _write_column_to_mysql(column, row_buffer, row_idx, col_const); + } private: template @@ -95,62 +126,16 @@ private: int row_idx, bool col_const) const; }; -template -Status DataTypeQuantileStateSerDe::write_column_to_pb(const IColumn& column, PValues& result, - int start, int end) const { - result.mutable_bytes_value()->Reserve(end - start); - for (size_t row_num = start; row_num < end; ++row_num) { - StringRef data = column.get_data_at(row_num); - result.add_bytes_value(data.to_string()); - } - return Status::OK(); -} - -template -Status DataTypeQuantileStateSerDe::read_column_from_pb(IColumn& column, - const PValues& arg) const { - column.reserve(arg.bytes_value_size()); - for (int i = 0; i < arg.bytes_value_size(); ++i) { - column.insert_data(arg.bytes_value(i).c_str(), arg.bytes_value(i).size()); - } - return Status::OK(); -} - -template -void DataTypeQuantileStateSerDe::write_one_cell_to_jsonb(const IColumn& column, - JsonbWriter& result, Arena* mem_pool, - int32_t col_id, int row_num) const { - auto& col = reinterpret_cast&>(column); - auto& val = const_cast&>(col.get_element(row_num)); - size_t actual_size = val.get_serialized_size(); - auto ptr = mem_pool->alloc(actual_size); - result.writeKey(col_id); - result.writeStartBinary(); - result.writeBinary(reinterpret_cast(ptr), actual_size); - result.writeEndBinary(); -} - -template -void DataTypeQuantileStateSerDe::read_one_cell_from_jsonb(IColumn& column, - const JsonbValue* arg) const { - auto& col = reinterpret_cast&>(column); - auto blob = static_cast(arg); - QuantileState val; - val.deserialize(Slice(blob->getBlob())); - col.insert_value(val); -} - // QuantileState is binary data which is not shown by mysql -template template -Status DataTypeQuantileStateSerDe::_write_column_to_mysql( - const IColumn& column, MysqlRowBuffer& result, int row_idx, - bool col_const) const { - auto& data_column = reinterpret_cast&>(column); +Status DataTypeQuantileStateSerDe::_write_column_to_mysql(const IColumn& column, + MysqlRowBuffer& result, + int row_idx, bool col_const) const { + auto& data_column = reinterpret_cast(column); if (_return_object_as_string) { const auto col_index = index_check_const(row_idx, col_const); - auto& quantile_value = const_cast&>(data_column.get_element(col_index)); + auto& quantile_value = const_cast(data_column.get_element(col_index)); size_t size = quantile_value.get_serialized_size(); std::unique_ptr buf = std::make_unique(size); quantile_value.serialize((uint8_t*)buf.get()); @@ -165,19 +150,5 @@ Status DataTypeQuantileStateSerDe::_write_column_to_mysql( return Status::OK(); } -template -Status DataTypeQuantileStateSerDe::write_column_to_mysql(const IColumn& column, - MysqlRowBuffer& row_buffer, - int row_idx, bool col_const) const { - return _write_column_to_mysql(column, row_buffer, row_idx, col_const); -} - -template -Status DataTypeQuantileStateSerDe::write_column_to_mysql(const IColumn& column, - MysqlRowBuffer& row_buffer, - int row_idx, bool col_const) const { - return _write_column_to_mysql(column, row_buffer, row_idx, col_const); -} - } // namespace vectorized } // namespace doris diff --git a/be/src/vec/functions/function_quantile_state.cpp b/be/src/vec/functions/function_quantile_state.cpp index 71409f9332..b963a07a7a 100644 --- a/be/src/vec/functions/function_quantile_state.cpp +++ b/be/src/vec/functions/function_quantile_state.cpp @@ -33,7 +33,6 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "common/status.h" #include "util/quantile_state.h" -#include "util/string_parser.hpp" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" #include "vec/columns/column_complex.h" @@ -63,35 +62,29 @@ class FunctionContext; namespace doris::vectorized { -template struct QuantileStateEmpty { static constexpr auto name = "quantile_state_empty"; - using ReturnColVec = ColumnQuantileState; - static DataTypePtr get_return_type() { - return std::make_shared>(); - } - static auto init_value() { return QuantileState {}; } + using ReturnColVec = ColumnQuantileState; + static DataTypePtr get_return_type() { return std::make_shared(); } + static auto init_value() { return QuantileState {}; } }; -template class FunctionToQuantileState : public IFunction { public: static constexpr auto name = "to_quantile_state"; String get_name() const override { return name; } - static FunctionPtr create() { - return std::make_shared>(); - } + static FunctionPtr create() { return std::make_shared(); } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return std::make_shared>(); + return std::make_shared(); } size_t get_number_of_arguments() const override { return 2; } bool use_default_implementation_for_nulls() const override { return false; } - template + template Status execute_internal(const ColumnPtr& column, const DataTypePtr& data_type, MutableColumnPtr& column_result) { auto type_error = [&]() { @@ -100,75 +93,41 @@ public: }; const ColumnNullable* col_nullable = nullptr; const ColumnUInt8* col_nullmap = nullptr; - const ColumnType* col = nullptr; + const ColumnFloat64* col = nullptr; const NullMap* nullmap = nullptr; if constexpr (is_nullable) { col_nullable = check_and_get_column(column.get()); col_nullmap = check_and_get_column( col_nullable->get_null_map_column_ptr().get()); - col = check_and_get_column(col_nullable->get_nested_column_ptr().get()); + col = check_and_get_column(col_nullable->get_nested_column_ptr().get()); if (col == nullptr || col_nullmap == nullptr) { return type_error(); } nullmap = &col_nullmap->get_data(); } else { - col = check_and_get_column(column.get()); + col = check_and_get_column(column.get()); } - auto* res_column = - reinterpret_cast*>(column_result.get()); + auto* res_column = reinterpret_cast(column_result.get()); auto& res_data = res_column->get_data(); size_t size = col->size(); for (size_t i = 0; i < size; ++i) { if constexpr (is_nullable) { if ((*nullmap)[i]) { + res_data[i].clear(); continue; } } - - if constexpr (std::is_same_v) { - const ColumnString::Chars& data = col->get_chars(); - const ColumnString::Offsets& offsets = col->get_offsets(); - - const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); - size_t str_size = offsets[i] - offsets[i - 1]; - StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - InternalType value = StringParser::string_to_float(raw_str, str_size, - &parse_result); - if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { - res_data[i].add_value(value); - } else { - std::stringstream ss; - ss << "The input column content: " << std::string(raw_str, str_size) - << " is not valid in function: " << get_name(); - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - } else if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v) { - // InternalType only can be double or float, so we can cast directly - InternalType value = (InternalType)col->get_data()[i]; - res_data[i].set_compression(compression); - res_data[i].add_value(value); - } else { - type_error(); - } + double value = (double)col->get_data()[i]; + res_data[i].set_compression(compression); + res_data[i].add_value(value); } return Status::OK(); } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { - if constexpr (!(std::is_same_v || - std::is_same_v)) { - std::stringstream ss; - ss << "The InternalType of quantile_state must be float or double"; - - return Status::InternalError(ss.str()); - } - const ColumnPtr& column = block.get_by_position(arguments[0]).column; const DataTypePtr& data_type = block.get_by_position(arguments[0]).type; auto compression_arg = check_and_get_column_const( @@ -184,39 +143,14 @@ public: MutableColumnPtr column_result = get_return_type_impl({})->create_column(); column_result->resize(input_rows_count); - auto type_error = [&]() { - return Status::RuntimeError("Illegal column {} of argument of function {}", - block.get_by_position(arguments[0]).column->get_name(), - get_name()); - }; Status status = Status::OK(); if (which.is_nullable()) { const DataTypePtr& nested_data_type = static_cast(data_type.get())->get_nested_type(); WhichDataType nested_which(nested_data_type); - if (nested_which.is_string_or_fixed_string()) { - status = execute_internal(column, data_type, column_result); - } else if (nested_which.is_int64()) { - status = execute_internal(column, data_type, column_result); - } else if (which.is_float32()) { - status = execute_internal(column, data_type, column_result); - } else if (which.is_float64()) { - status = execute_internal(column, data_type, column_result); - } else { - return type_error(); - } + execute_internal(column, data_type, column_result); } else { - if (which.is_string_or_fixed_string()) { - status = execute_internal(column, data_type, column_result); - } else if (which.is_int64()) { - status = execute_internal(column, data_type, column_result); - } else if (which.is_float32()) { - status = execute_internal(column, data_type, column_result); - } else if (which.is_float64()) { - status = execute_internal(column, data_type, column_result); - } else { - return type_error(); - } + execute_internal(column, data_type, column_result); } if (status.ok()) { block.replace_by_position(result, std::move(column_result)); @@ -228,15 +162,12 @@ private: float compression = 2048; }; -template class FunctionQuantileStatePercent : public IFunction { public: static constexpr auto name = "quantile_percent"; String get_name() const override { return name; } - static FunctionPtr create() { - return std::make_shared>(); - } + static FunctionPtr create() { return std::make_shared(); } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return std::make_shared(); @@ -258,7 +189,7 @@ public: VectorizedUtils::update_null_map(null_map, nullable->get_null_map_data()); column = nullable->get_nested_column_ptr(); } - auto str_col = assert_cast*>(column.get()); + auto str_col = assert_cast(column.get()); auto& col_data = str_col->get_data(); auto percent_arg = check_and_get_column_const( block.get_by_position(arguments.back()).column); @@ -290,14 +221,10 @@ public: } }; -using FunctionQuantileStateEmpty = FunctionConst, false>; -using FunctionQuantileStatePercentDouble = FunctionQuantileStatePercent; -using FunctionToQuantileStateDouble = FunctionToQuantileState; - void register_function_quantile_state(SimpleFunctionFactory& factory) { - factory.register_function(); - factory.register_function(); - factory.register_function(); + factory.register_function>(); + factory.register_function(); + factory.register_function(); } } // namespace doris::vectorized diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index e7b59033c4..9f3fe2b7ac 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -355,22 +355,22 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorBitMap::convert_to_olap() Status OlapBlockDataConvertor::OlapColumnDataConvertorQuantileState::convert_to_olap() { assert(_typed_column.column); - const vectorized::ColumnQuantileStateDouble* column_quantile_state = nullptr; + const vectorized::ColumnQuantileState* column_quantile_state = nullptr; if (_nullmap) { auto nullable_column = assert_cast(_typed_column.column.get()); - column_quantile_state = assert_cast( + column_quantile_state = assert_cast( nullable_column->get_nested_column_ptr().get()); } else { - column_quantile_state = assert_cast( - _typed_column.column.get()); + column_quantile_state = + assert_cast(_typed_column.column.get()); } assert(column_quantile_state); - QuantileStateDouble* quantile_state = - const_cast(column_quantile_state->get_data().data() + _row_pos); - QuantileStateDouble* quantile_state_cur = quantile_state; - QuantileStateDouble* quantile_state_end = quantile_state_cur + _num_rows; + QuantileState* quantile_state = + const_cast(column_quantile_state->get_data().data() + _row_pos); + QuantileState* quantile_state_cur = quantile_state; + QuantileState* quantile_state_end = quantile_state_cur + _num_rows; size_t total_size = 0; if (_nullmap) { diff --git a/be/test/util/quantile_state_test.cpp b/be/test/util/quantile_state_test.cpp index afc407fadd..a641731656 100644 --- a/be/test/util/quantile_state_test.cpp +++ b/be/test/util/quantile_state_test.cpp @@ -23,10 +23,9 @@ #include "gtest/gtest_pred_impl.h" namespace doris { -using DoubleQuantileState = QuantileState; TEST(QuantileStateTest, merge) { - DoubleQuantileState empty; + QuantileState empty; EXPECT_EQ(EMPTY, empty._type); empty.add_value(1); EXPECT_EQ(SINGLE, empty._type); @@ -38,7 +37,7 @@ TEST(QuantileStateTest, merge) { EXPECT_EQ(3, empty.get_value_by_percentile(0.5)); EXPECT_EQ(5, empty.get_value_by_percentile(1)); - DoubleQuantileState another; + QuantileState another; another.add_value(6); another.add_value(7); another.add_value(8); diff --git a/be/test/vec/core/column_complex_test.cpp b/be/test/vec/core/column_complex_test.cpp index 2a45eb1b04..dd2109d1f7 100644 --- a/be/test/vec/core/column_complex_test.cpp +++ b/be/test/vec/core/column_complex_test.cpp @@ -97,17 +97,17 @@ public: void check_bitmap_column(const IColumn& l, const IColumn& r) { ASSERT_EQ(l.size(), r.size()); - const auto& l_col = assert_cast(l); - const auto& r_col = assert_cast(r); + const auto& l_col = assert_cast(l); + const auto& r_col = assert_cast(r); for (size_t i = 0; i < l_col.size(); ++i) { - auto& l_value = const_cast(l_col.get_element(i)); - auto& r_value = const_cast(r_col.get_element(i)); + auto& l_value = const_cast(l_col.get_element(i)); + auto& r_value = const_cast(r_col.get_element(i)); ASSERT_EQ(l_value.get_serialized_size(), r_value.get_serialized_size()); } } void check_serialize_and_deserialize(MutableColumnPtr& col) { - auto column = assert_cast(col.get()); + auto column = assert_cast(col.get()); auto size = _quantile_state_type.get_uncompressed_serialized_bytes( *column, BeExecVersionManager::get_newest_version()); std::unique_ptr buf = std::make_unique(size); @@ -122,7 +122,7 @@ public: } private: - DataTypeQuantileStateDouble _quantile_state_type; + DataTypeQuantileState _quantile_state_type; }; TEST_F(ColumnBitmapTest, ColumnBitmapReadWrite) { @@ -162,7 +162,7 @@ TEST_F(ColumnQuantileStateTest, ColumnQuantileStateReadWrite) { // quantile column with lots of rows const size_t row_size = 20000; - auto& data = assert_cast(*column.get()).get_data(); + auto& data = assert_cast(*column.get()).get_data(); data.resize(row_size); // EMPTY type check_serialize_and_deserialize(column); diff --git a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp index 628b977a2b..17ba900b68 100644 --- a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp @@ -146,16 +146,16 @@ inline void serialize_and_deserialize_pb_test() { std::cout << "==== quantilestate === " << std::endl; { vectorized::DataTypePtr quantile_data_type( - std::make_shared()); + std::make_shared()); auto quantile_column = quantile_data_type->create_column(); - std::vector& container = - ((vectorized::ColumnQuantileStateDouble*)quantile_column.get())->get_data(); + std::vector& container = + ((vectorized::ColumnQuantileState*)quantile_column.get())->get_data(); const long max_rand = 1000000L; double lower_bound = 0; double upper_bound = 100; srandom(time(nullptr)); for (int i = 0; i < 1024; ++i) { - QuantileStateDouble q; + QuantileState q; double random_double = lower_bound + (upper_bound - lower_bound) * (random() % max_rand) / max_rand; q.add_value(random_double); diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp b/be/test/vec/data_types/serde/data_type_serde_test.cpp index cdb7455d26..4ad0bf4401 100644 --- a/be/test/vec/data_types/serde/data_type_serde_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp @@ -141,16 +141,16 @@ inline void serialize_and_deserialize_pb_test() { // quantilestate { vectorized::DataTypePtr quantile_data_type( - std::make_shared()); + std::make_shared()); auto quantile_column = quantile_data_type->create_column(); - std::vector& container = - ((vectorized::ColumnQuantileStateDouble*)quantile_column.get())->get_data(); + std::vector& container = + ((vectorized::ColumnQuantileState*)quantile_column.get())->get_data(); const long max_rand = 1000000L; double lower_bound = 0; double upper_bound = 100; srandom(time(nullptr)); for (int i = 0; i < 1024; ++i) { - QuantileStateDouble q; + QuantileState q; double random_double = lower_bound + (upper_bound - lower_bound) * (random() % max_rand) / max_rand; q.add_value(random_double); diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/AGG_STATE.md b/docs/en/docs/sql-manual/sql-reference/Data-Types/AGG_STATE.md index 69f6f2bb9c..41535d923b 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Types/AGG_STATE.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/AGG_STATE.md @@ -82,6 +82,7 @@ If you need to get the actual result, you need to use the corresponding [merge]( ``` If you want to aggregate only the agg_state without getting the actual result during the process, you can use the [union](../..//sql-functions/combinators/union.md) function. +For more examples, see [datatype_p0/agg_state](https://github.com/apache/doris/tree/master/regression-test/suites/datatype_p0/agg_state) ### keywords AGG_STATE diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/QUANTILE_STATE.md b/docs/en/docs/sql-manual/sql-reference/Data-Types/QUANTILE_STATE.md index ad0ab8fffd..8dfe546317 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Types/QUANTILE_STATE.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/QUANTILE_STATE.md @@ -29,6 +29,8 @@ under the License. QUANTILE_STATE +**In 2.0, we support the [agg_state](AGG_STATE.md) function, and it is recommended to use agg_state quantile_union(quantile_state not null) instead of this type.** + QUANTILE_STATE cannot be used as a key column, and the aggregation type is QUANTILE_UNION when building the table. The user does not need to specify the length and default value. The length is controlled within the system according to the degree of data aggregation. And the QUANTILE_STATE column can only be queried or used through the supporting QUANTILE_PERCENT, QUANTILE_UNION and TO_QUANTILE_STATE functions. @@ -41,7 +43,7 @@ related functions: This function is an aggregation function, which is used to aggregate the intermediate results of different quantile calculations. The result returned by this function is still QUANTILE_STATE - TO_QUANTILE_STATE(INT/FLOAT/DOUBLE raw_data [,FLOAT compression]): + TO_QUANTILE_STATE(DOUBLE raw_data [,FLOAT compression]): This function converts a numeric type to a QUANTILE_STATE type The compression parameter is optional and can be set in the range [2048, 10000]. diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/AGG_STATE.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/AGG_STATE.md index 291f835411..18dc95c324 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/AGG_STATE.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/AGG_STATE.md @@ -85,6 +85,8 @@ under the License. 如果想要在过程中只聚合agg_state而不获取实际结果,可以使用[union](../..//sql-functions/combinators/union.md)函数。 +更多的例子参见[datatype_p0/agg_state](https://github.com/apache/doris/tree/master/regression-test/suites/datatype_p0/agg_state) + ### keywords AGG_STATE diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/QUANTILE_STATE.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/QUANTILE_STATE.md index 7c66681e1a..b249dbe5e4 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/QUANTILE_STATE.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/QUANTILE_STATE.md @@ -27,6 +27,9 @@ under the License. ## QUANTILE_STATE ### description QUANTILE_STATE + + **在2.0中我们支持了[agg_state](AGG_STATE.md)功能,推荐使用agg_state quantile_union(quantile_state not null)来代替本类型。** + QUANTILE_STATE不能作为key列使用,建表时配合聚合类型为QUANTILE_UNION。 用户不需要指定长度和默认值。长度根据数据的聚合程度系统内控制。 并且QUANTILE_STATE列只能通过配套的QUANTILE_PERCENT、QUANTILE_UNION、TO_QUANTILE_STATE等函数进行查询或使用。 @@ -39,7 +42,7 @@ under the License. 此函数为聚合函数,用于将不同的分位数计算中间结果进行聚合操作。此函数返回的结果仍是QUANTILE_STATE - TO_QUANTILE_STATE(INT/FLOAT/DOUBLE raw_data [,FLOAT compression]): + TO_QUANTILE_STATE(DOUBLE raw_data [,FLOAT compression]): 此函数将数值类型转化成QUANTILE_STATE类型 compression参数是可选项,可设置范围是[2048, 10000],值越大,后续分位数近似计算的精度越高,内存消耗越大,计算耗时越长。 compression参数未指定或设置的值在[2048, 10000]范围外,以2048的默认值运行 diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToQuantileState.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToQuantileState.java index cdd286372b..40f6787c28 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToQuantileState.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToQuantileState.java @@ -24,9 +24,9 @@ import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.FloatType; import org.apache.doris.nereids.types.QuantileStateType; -import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -40,7 +40,7 @@ public class ToQuantileState extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(QuantileStateType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, FloatType.INSTANCE) + FunctionSignature.ret(QuantileStateType.INSTANCE).args(DoubleType.INSTANCE, FloatType.INSTANCE) ); /** diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 7d16cc9990..0dd79c2df6 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1847,10 +1847,7 @@ visible_functions = { # quantile_function "quantile" : [ - [['to_quantile_state'], 'QUANTILE_STATE', ['VARCHAR', 'FLOAT'], 'ALWAYS_NOT_NULLABLE'], [['to_quantile_state'], 'QUANTILE_STATE', ['DOUBLE', 'FLOAT'], 'ALWAYS_NOT_NULLABLE'], - [['to_quantile_state'], 'QUANTILE_STATE', ['FLOAT', 'FLOAT'], 'ALWAYS_NOT_NULLABLE'], - [['to_quantile_state'], 'QUANTILE_STATE', ['BIGINT', 'FLOAT'], 'ALWAYS_NOT_NULLABLE'], [['quantile_percent'], 'DOUBLE', ['QUANTILE_STATE', 'FLOAT'], 'ALWAYS_NOT_NULLABLE'] ], diff --git a/regression-test/data/datatype_p0/agg_state/quantile_union/test_agg_state_quantile_union.out b/regression-test/data/datatype_p0/agg_state/quantile_union/test_agg_state_quantile_union.out new file mode 100644 index 0000000000..5b464e1e3f --- /dev/null +++ b/regression-test/data/datatype_p0/agg_state/quantile_union/test_agg_state_quantile_union.out @@ -0,0 +1,14 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +0 499.5 +1 1499.5 +2 2499.5 +3 3499.5 +4 4499.5 +5 5499.5 +6 6499.5 +7 7499.5 + +-- !select -- +3999.5 + diff --git a/regression-test/suites/datatype_p0/agg_state/quantile_union/test_agg_state_quantile_union.groovy b/regression-test/suites/datatype_p0/agg_state/quantile_union/test_agg_state_quantile_union.groovy new file mode 100644 index 0000000000..eafccc6491 --- /dev/null +++ b/regression-test/suites/datatype_p0/agg_state/quantile_union/test_agg_state_quantile_union.groovy @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_agg_state_quantile_union") { + sql "set global enable_agg_state=true" + sql """ DROP TABLE IF EXISTS a_table; """ + sql """ + create table a_table( + k1 int not null, + k2 agg_state quantile_union(quantile_state not null) + ) + aggregate key (k1) + distributed BY hash(k1) + properties("replication_num" = "1"); + """ + + sql """insert into a_table + select e1/1000,quantile_union_state(TO_QUANTILE_STATE(e1, 2048)) from + (select 1 k1) as t lateral view explode_numbers(8000) tmp1 as e1;""" + + + sql"set enable_nereids_planner=true;" + qt_select """ select k1,quantile_percent(quantile_union_merge(k2),0.5) from a_table group by k1 order by k1; + """ + qt_select """ select quantile_percent(quantile_union_merge(tmp),0.5) from (select k1,quantile_union_union(k2) tmp from a_table group by k1)t; + """ +}