From b96b8f4370fb150cb5a7d54fbe257291f0facde7 Mon Sep 17 00:00:00 2001 From: Pxl Date: Fri, 25 Aug 2023 14:08:24 +0800 Subject: [PATCH] [Bug](jdbc) support get_default on complex type (#23325) support get_default on complex type --- be/CMakeLists.txt | 17 +- be/src/vec/common/field_visitors.h | 58 --- be/src/vec/core/field.cpp | 12 - be/src/vec/core/field.h | 359 +++++------------- .../vec/data_types/convert_field_to_type.cpp | 3 - be/src/vec/data_types/data_type_bitmap.h | 5 +- be/src/vec/data_types/data_type_hll.h | 6 +- .../vec/data_types/data_type_quantilestate.h | 5 +- 8 files changed, 111 insertions(+), 354 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index a8cea5c503..df92daeb3d 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -225,15 +225,18 @@ function(TRY_TO_CHANGE_LINKER LINKER_COMMAND LINKER_NAME) endif() endfunction() -# In terms of performance, mold> lld> gold> ld -set(CUSTUM_LINKER_COMMAND "ld") -TRY_TO_CHANGE_LINKER("mold" "mold") -TRY_TO_CHANGE_LINKER("lld" "LLD") -TRY_TO_CHANGE_LINKER("gold" "GNU gold") -if (NOT CUSTUM_LINKER_COMMAND STREQUAL "ld") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${CUSTUM_LINKER_COMMAND}") +if (NOT OS_MACOSX) # MACOSX's lld will core dump + # In terms of performance, mold> lld> gold> ld + set(CUSTUM_LINKER_COMMAND "ld") + TRY_TO_CHANGE_LINKER("mold" "mold") + TRY_TO_CHANGE_LINKER("lld" "LLD") + TRY_TO_CHANGE_LINKER("gold" "GNU gold") + if (NOT CUSTUM_LINKER_COMMAND STREQUAL "ld") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${CUSTUM_LINKER_COMMAND}") + endif() endif() + set(CMAKE_CXX_STANDARD 20) set(CMAKE_C_STANDARD 17) diff --git a/be/src/vec/common/field_visitors.h b/be/src/vec/common/field_visitors.h index d33dbce570..68a85170d4 100644 --- a/be/src/vec/common/field_visitors.h +++ b/be/src/vec/common/field_visitors.h @@ -63,64 +63,6 @@ typename std::decay_t::ResultType apply_visitor(Visitor&& visitor, F&& return visitor(field.template get>()); case Field::Types::Decimal128I: return visitor(field.template get>()); - case Field::Types::AggregateFunctionState: - return visitor(field.template get()); - - default: - LOG(FATAL) << "Bad type of Field"; - return {}; - } -} - -template -typename std::decay_t::ResultType apply_visitor(Visitor&& visitor, F1&& field1, - F2&& field2) { - switch (field1.getType()) { - case Field::Types::Null: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get(), std::forward(field2)); - case Field::Types::UInt64: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get(), std::forward(field2)); - case Field::Types::UInt128: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get(), std::forward(field2)); - case Field::Types::Int64: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get(), std::forward(field2)); - case Field::Types::Float64: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get(), std::forward(field2)); - case Field::Types::String: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get(), std::forward(field2)); - case Field::Types::Array: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get(), std::forward(field2)); - case Field::Types::Tuple: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get(), std::forward(field2)); - case Field::Types::Decimal32: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get>(), - std::forward(field2)); - case Field::Types::Decimal64: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get>(), - std::forward(field2)); - case Field::Types::Decimal128: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get>(), - std::forward(field2)); - case Field::Types::Decimal128I: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get>(), - std::forward(field2)); - case Field::Types::AggregateFunctionState: - return apply_binary_visitor_impl(std::forward(visitor), - field1.template get(), - std::forward(field2)); - default: LOG(FATAL) << "Bad type of Field"; return {}; diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp index aeb55c1d79..9970b284ce 100644 --- a/be/src/vec/core/field.cpp +++ b/be/src/vec/core/field.cpp @@ -83,13 +83,6 @@ void read_binary(Array& x, BufferReadable& buf) { x.push_back(value); break; } - case Field::Types::AggregateFunctionState: { - AggregateFunctionStateData value; - doris::vectorized::read_string_binary(value.name, buf); - doris::vectorized::read_string_binary(value.data, buf); - x.push_back(value); - break; - } } } } @@ -129,11 +122,6 @@ void write_binary(const Array& x, BufferWritable& buf) { doris::vectorized::write_json_binary(get(*it), buf); break; } - case Field::Types::AggregateFunctionState: { - doris::vectorized::write_string_binary(it->get().name, buf); - doris::vectorized::write_string_binary(it->get().data, buf); - break; - } } }; } diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 89c7d79934..941ca3fe3d 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -39,6 +39,9 @@ // IWYU pragma: no_include #include "common/compiler_util.h" // IWYU pragma: keep +#include "olap/hll.h" +#include "util/bitmap_value.h" +#include "util/quantile_state.h" #include "vec/common/uint128.h" #include "vec/core/types.h" @@ -55,7 +58,9 @@ struct PackedInt128; namespace doris::vectorized { template -struct NearestFieldTypeImpl; +struct NearestFieldTypeImpl { + using Type = T; +}; template using NearestFieldType = typename NearestFieldTypeImpl::Type; @@ -117,37 +122,6 @@ using FieldMap = std::map>; DEFINE_FIELD_MAP(VariantMap); #undef DEFINE_FIELD_MAP -struct AggregateFunctionStateData { - String name; /// Name with arguments. - String data; - - bool operator<(const AggregateFunctionStateData&) const { - LOG(FATAL) << "Operator < is not implemented for AggregateFunctionStateData."; - } - - bool operator<=(const AggregateFunctionStateData&) const { - LOG(FATAL) << "Operator <= is not implemented for AggregateFunctionStateData."; - } - - bool operator>(const AggregateFunctionStateData&) const { - LOG(FATAL) << "Operator <= is not implemented for AggregateFunctionStateData."; - } - - bool operator>=(const AggregateFunctionStateData&) const { - LOG(FATAL) << "Operator >= is not implemented for AggregateFunctionStateData."; - } - - bool operator==(const AggregateFunctionStateData& rhs) const { - if (name != rhs.name) { - LOG(FATAL) << fmt::format( - "Comparing aggregate functions with different types: {} and {}", name, - rhs.name); - } - - return data == rhs.data; - } -}; - class JsonbField { public: JsonbField() = default; @@ -342,6 +316,9 @@ public: Decimal128I = 24, Map = 25, VariantMap = 26, + Bitmap = 27, + HyperLogLog = 28, + QuantileState = 29, }; static const int MIN_NON_POD = 16; @@ -378,16 +355,20 @@ public: return "Decimal128"; case Decimal128I: return "Decimal128I"; - case AggregateFunctionState: - return "AggregateFunctionState"; case FixedLengthObject: return "FixedLengthObject"; case VariantMap: return "VariantMap"; + case Bitmap: + return "Bitmap"; + case HyperLogLog: + return "HyperLogLog"; + case QuantileState: + return "QuantileState"; + default: + LOG(FATAL) << "type not supported, type=" << Types::to_string(which); + break; } - - LOG(FATAL) << "Bad type of Field"; - return nullptr; } }; @@ -523,153 +504,65 @@ public: return get(); } - bool operator<(const Field& rhs) const { - if (which < rhs.which) return true; - if (which > rhs.which) return false; - - switch (which) { - case Types::Null: - return false; - case Types::UInt64: - return get() < rhs.get(); - case Types::UInt128: - return get() < rhs.get(); - case Types::Int64: - return get() < rhs.get(); - case Types::Int128: - return get() < rhs.get(); - case Types::Float64: - return get() < rhs.get(); - case Types::String: - return get() < rhs.get(); - case Types::JSONB: - return get() < rhs.get(); - case Types::Array: - return get() < rhs.get(); - case Types::Tuple: - return get() < rhs.get(); - case Types::Map: - return get() < rhs.get(); - case Types::Decimal32: - return get>() < rhs.get>(); - case Types::Decimal64: - return get>() < rhs.get>(); - case Types::Decimal128: - return get>() < rhs.get>(); - case Types::Decimal128I: - return get>() < rhs.get>(); - case Types::AggregateFunctionState: - return get() < rhs.get(); - case Types::FixedLengthObject: - break; - case Types::VariantMap: - return get() < rhs.get(); - } - - LOG(FATAL) << "Bad type of Field"; - return {}; - } - - bool operator>(const Field& rhs) const { return rhs < *this; } - - bool operator<=(const Field& rhs) const { - if (which < rhs.which) return true; - if (which > rhs.which) return false; - - switch (which) { - case Types::Null: - return true; - case Types::UInt64: - return get() <= rhs.get(); - case Types::UInt128: - return get() <= rhs.get(); - case Types::Int64: - return get() <= rhs.get(); - case Types::Int128: - return get() <= rhs.get(); - case Types::Float64: - return get() <= rhs.get(); - case Types::String: - return get() <= rhs.get(); - case Types::JSONB: - return get() <= rhs.get(); - case Types::Array: - return get() <= rhs.get(); - case Types::Tuple: - return get() <= rhs.get(); - case Types::Map: - return get() < rhs.get(); - case Types::Decimal32: - return get>() <= rhs.get>(); - case Types::Decimal64: - return get>() <= rhs.get>(); - case Types::Decimal128: - return get>() <= rhs.get>(); - case Types::Decimal128I: - return get>() <= rhs.get>(); - case Types::AggregateFunctionState: - return get() <= rhs.get(); - case Types::FixedLengthObject: - break; - case Types::VariantMap: - return get() <= rhs.get(); - } - LOG(FATAL) << "Bad type of Field"; - return {}; - } - - bool operator>=(const Field& rhs) const { return rhs <= *this; } - bool operator==(const Field& rhs) const { - if (which != rhs.which) return false; - - switch (which) { - case Types::Null: - return true; - case Types::UInt64: - case Types::Int64: - case Types::Float64: - return get() == rhs.get(); - case Types::String: - return get() == rhs.get(); - case Types::JSONB: - return get() == rhs.get(); - case Types::Array: - return get() == rhs.get(); - case Types::Tuple: - return get() == rhs.get(); - case Types::Map: - return get() < rhs.get(); - case Types::UInt128: - return get() == rhs.get(); - case Types::Int128: - return get() == rhs.get(); - case Types::Decimal32: - return get>() == rhs.get>(); - case Types::Decimal64: - return get>() == rhs.get>(); - case Types::Decimal128: - return get>() == rhs.get>(); - case Types::Decimal128I: - return get>() == rhs.get>(); - case Types::AggregateFunctionState: - return get() == rhs.get(); - case Types::FixedLengthObject: - break; - case Types::VariantMap: - return get() == rhs.get(); - } - - CHECK(false) << "Bad type of Field"; + return operator<=>(rhs) == std::strong_ordering::equal; } - bool operator!=(const Field& rhs) const { return !(*this == rhs); } + std::strong_ordering operator<=>(const Field& rhs) const { + if (which == Types::Null || rhs == Types::Null) { + return std::strong_ordering::equal; + } + if (which != rhs.which) { + LOG(FATAL) << "lhs type not equal with rhs, lhs=" << Types::to_string(which) + << ", rhs=" << Types::to_string(rhs.which); + } + + switch (which) { + case Types::Bitmap: + case Types::HyperLogLog: + case Types::QuantileState: + case Types::FixedLengthObject: + case Types::JSONB: + case Types::Null: + case Types::Array: + case Types::Tuple: + case Types::Map: + case Types::VariantMap: + return std::strong_ordering::equal; + case Types::UInt64: + return get() <=> rhs.get(); + case Types::UInt128: + return get() <=> rhs.get(); + case Types::Int64: + return get() <=> rhs.get(); + case Types::Int128: + return get() <=> rhs.get(); + case Types::Float64: + return get() < rhs.get() ? std::strong_ordering::less + : get() == rhs.get() ? std::strong_ordering::equal + : std::strong_ordering::greater; + case Types::String: + return get() <=> rhs.get(); + case Types::Decimal32: + return get() <=> rhs.get(); + case Types::Decimal64: + return get() <=> rhs.get(); + case Types::Decimal128: + return get() <=> rhs.get(); + case Types::Decimal128I: + return get() <=> rhs.get(); + default: + LOG(FATAL) << "lhs type not equal with rhs, lhs=" << Types::to_string(which) + << ", rhs=" << Types::to_string(rhs.which); + break; + } + } private: std::aligned_union_t, DecimalField, DecimalField, - DecimalField, AggregateFunctionStateData> + DecimalField, BitmapValue, HyperLogLog, QuantileState> storage; Types::Which which; @@ -747,15 +640,21 @@ private: case Types::Decimal128I: f(field.template get>()); return; - case Types::AggregateFunctionState: - f(field.template get()); - return; - case Types::FixedLengthObject: - LOG(FATAL) << "FixedLengthObject not supported"; - break; case Types::VariantMap: f(field.template get()); return; + case Types::Bitmap: + f(field.template get()); + return; + case Types::HyperLogLog: + f(field.template get()); + return; + case Types::QuantileState: + f(field.template get>()); + return; + default: + LOG(FATAL) << "type not supported, type=" << Types::to_string(field.which); + break; } } @@ -813,9 +712,6 @@ private: case Types::Map: destroy(); break; - case Types::AggregateFunctionState: - destroy(); - break; case Types::VariantMap: destroy(); break; @@ -836,10 +732,6 @@ private: #undef DBMS_MIN_FIELD_SIZE -template <> -struct TypeId { - static constexpr const TypeIndex value = TypeIndex::AggregateFunction; -}; template <> struct TypeId { static constexpr const TypeIndex value = TypeIndex::Tuple; @@ -921,14 +813,25 @@ struct Field::TypeToEnum> { static constexpr Types::Which value = Types::Decimal128I; }; template <> -struct Field::TypeToEnum { - static constexpr Types::Which value = Types::AggregateFunctionState; -}; -template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::VariantMap; }; +template <> +struct Field::TypeToEnum { + static constexpr Types::Which value = Types::Bitmap; +}; + +template <> +struct Field::TypeToEnum { + static constexpr Types::Which value = Types::HyperLogLog; +}; + +template <> +struct Field::TypeToEnum> { + static constexpr Types::Which value = Types::QuantileState; +}; + template <> struct Field::EnumToType { using Type = Null; @@ -990,10 +893,6 @@ struct Field::EnumToType { using Type = DecimalField; }; template <> -struct Field::EnumToType { - using Type = DecimalField; -}; -template <> struct Field::EnumToType { using Type = VariantMap; }; @@ -1035,10 +934,6 @@ template <> struct TypeName { static std::string get() { return "Map"; } }; -template <> -struct TypeName { - static std::string get() { return "AggregateFunctionState"; } -}; /// char may be signed or unsigned, and behave identically to signed char or unsigned char, /// but they are always three different types. @@ -1065,11 +960,6 @@ struct NearestFieldTypeImpl { using Type = UInt64; }; -template <> -struct NearestFieldTypeImpl { - using Type = UInt128; -}; -//template <> struct NearestFieldTypeImpl { using Type = UInt128; }; template <> struct NearestFieldTypeImpl { using Type = Int64; @@ -1079,34 +969,13 @@ struct NearestFieldTypeImpl { using Type = Int64; }; -template <> -struct NearestFieldTypeImpl { - using Type = VariantMap; -}; - /// long and long long are always different types that may behave identically or not. /// This is different on Linux and Mac. template <> struct NearestFieldTypeImpl { using Type = Int64; }; -template <> -struct NearestFieldTypeImpl { - using Type = Int64; -}; -template <> -struct NearestFieldTypeImpl { - using Type = UInt64; -}; -template <> -struct NearestFieldTypeImpl { - using Type = UInt64; -}; -template <> -struct NearestFieldTypeImpl { - using Type = Int128; -}; template <> struct NearestFieldTypeImpl { using Type = DecimalField; @@ -1144,52 +1013,19 @@ struct NearestFieldTypeImpl { using Type = Float64; }; template <> -struct NearestFieldTypeImpl { - using Type = Float64; -}; -template <> struct NearestFieldTypeImpl { using Type = String; }; template <> -struct NearestFieldTypeImpl { - using Type = String; -}; -template <> -struct NearestFieldTypeImpl { - using Type = JsonbField; -}; -template <> -struct NearestFieldTypeImpl { - using Type = Array; -}; -template <> -struct NearestFieldTypeImpl { - using Type = Tuple; -}; -template <> -struct NearestFieldTypeImpl { - using Type = Map; -}; -template <> struct NearestFieldTypeImpl { using Type = UInt64; }; -template <> -struct NearestFieldTypeImpl { - using Type = Null; -}; template <> struct NearestFieldTypeImpl { using Type = String; }; -template <> -struct NearestFieldTypeImpl { - using Type = AggregateFunctionStateData; -}; - template <> struct Field::TypeToEnum { static const Types::Which value = Types::Int128; @@ -1234,8 +1070,9 @@ Field& Field::operator=(T&& rhs) { if (which != TypeToEnum>::value) { destroy(); create_concrete(std::forward(val)); - } else + } else { assign_concrete(std::forward(val)); + } return *this; } diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp index a2a1377050..ba49257898 100644 --- a/be/src/vec/data_types/convert_field_to_type.cpp +++ b/be/src/vec/data_types/convert_field_to_type.cpp @@ -82,9 +82,6 @@ public: [[noreturn]] String operator()(const DecimalField& x) const { LOG(FATAL) << "not implemeted"; } - [[noreturn]] String operator()(const AggregateFunctionStateData& x) const { - LOG(FATAL) << "not implemeted"; - } }; namespace { diff --git a/be/src/vec/data_types/data_type_bitmap.h b/be/src/vec/data_types/data_type_bitmap.h index 2579a68378..6dabd87f34 100644 --- a/be/src/vec/data_types/data_type_bitmap.h +++ b/be/src/vec/data_types/data_type_bitmap.h @@ -94,10 +94,7 @@ public: } void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; - [[noreturn]] virtual Field get_default() const override { - LOG(FATAL) << "Method get_default() is not implemented for data type " << get_name(); - __builtin_unreachable(); - } + Field get_default() const override { return BitmapValue(); } [[noreturn]] Field get_field(const TExprNode& node) const override { LOG(FATAL) << "Unimplemented get_field for BitMap"; diff --git a/be/src/vec/data_types/data_type_hll.h b/be/src/vec/data_types/data_type_hll.h index 2d397b5832..2b85e27914 100644 --- a/be/src/vec/data_types/data_type_hll.h +++ b/be/src/vec/data_types/data_type_hll.h @@ -91,11 +91,7 @@ public: void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; Status from_string(ReadBuffer& rb, IColumn* column) const override; - Field get_default() const override { - LOG(FATAL) << "Method get_default() is not implemented for data type " << get_name(); - // unreachable - return String(); - } + Field get_default() const override { return HyperLogLog::empty(); } [[noreturn]] Field get_field(const TExprNode& node) const override { LOG(FATAL) << "Unimplemented get_field for HLL"; diff --git a/be/src/vec/data_types/data_type_quantilestate.h b/be/src/vec/data_types/data_type_quantilestate.h index e4afebfd04..c3bcf29464 100644 --- a/be/src/vec/data_types/data_type_quantilestate.h +++ b/be/src/vec/data_types/data_type_quantilestate.h @@ -93,10 +93,7 @@ public: } void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override; - [[noreturn]] virtual Field get_default() const override { - LOG(FATAL) << "Method get_default() is not implemented for data type " << get_name(); - __builtin_unreachable(); - } + Field get_default() const override { return QuantileState(); } [[noreturn]] Field get_field(const TExprNode& node) const override { LOG(FATAL) << "Unimplemented get_field for quantilestate";