diff --git a/be/src/gutil/cpu.cc b/be/src/gutil/cpu.cc index 91cf92ef63..a7d9bc6540 100644 --- a/be/src/gutil/cpu.cc +++ b/be/src/gutil/cpu.cc @@ -70,6 +70,7 @@ CPU::CPU() has_popcnt_(false), has_avx_(false), has_avx2_(false), + has_avx512_(false), has_aesni_(false), has_non_stop_time_stamp_counter_(false), is_running_in_vm_(false), @@ -201,6 +202,8 @@ void CPU::Initialize() { (xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */; has_aesni_ = (cpu_info[2] & 0x02000000) != 0; has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0; + has_avx512_ = has_avx2_ && (cpu_info7[1] & 0x00010000) != 0 && + (cpu_info7[1] & 0x40000000) != 0 && (cpu_info7[1] & 0x80000000) != 0; } // Get the brand string of the cpu. __cpuid(cpu_info, 0x80000000); @@ -253,6 +256,7 @@ void CPU::Initialize() { #endif } CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const { + if (has_avx512()) return AVX512; if (has_avx2()) return AVX2; if (has_avx()) return AVX; if (has_sse42()) return SSE42; diff --git a/be/src/gutil/cpu.h b/be/src/gutil/cpu.h index f7a12bbe43..82b87a1fb3 100644 --- a/be/src/gutil/cpu.h +++ b/be/src/gutil/cpu.h @@ -60,6 +60,7 @@ public: SSE42, AVX, AVX2, + AVX512, MAX_INTEL_MICRO_ARCHITECTURE }; // Accessors for CPU information. @@ -81,6 +82,7 @@ public: bool has_popcnt() const { return has_popcnt_; } bool has_avx() const { return has_avx_; } bool has_avx2() const { return has_avx2_; } + bool has_avx512() const { return has_avx512_; } bool has_aesni() const { return has_aesni_; } bool has_non_stop_time_stamp_counter() const { return has_non_stop_time_stamp_counter_; } bool is_running_in_vm() const { return is_running_in_vm_; } @@ -107,6 +109,7 @@ private: bool has_popcnt_; bool has_avx_; bool has_avx2_; + bool has_avx512_; bool has_aesni_; bool has_non_stop_time_stamp_counter_; bool is_running_in_vm_; diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp index 7ad20f210c..7e569f92b2 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp +++ b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp @@ -34,6 +34,17 @@ #undef bshuf_compress_lz4 #undef bshuf_decompress_lz4 +// Include the bitshuffle header again, but this time importing the +// AVX512-compiled symbols by defining some macros. +#undef BITSHUFFLE_H +#define bshuf_compress_lz4_bound bshuf_compress_lz4_bound_avx512 +#define bshuf_compress_lz4 bshuf_compress_lz4_avx512 +#define bshuf_decompress_lz4 bshuf_decompress_lz4_avx512 +#include // NOLINT(*) +#undef bshuf_compress_lz4_bound +#undef bshuf_compress_lz4 +#undef bshuf_decompress_lz4 + using base::CPU; namespace doris { @@ -54,7 +65,11 @@ decltype(&bshuf_decompress_lz4) g_bshuf_decompress_lz4; // the cost of a 'std::once' call. __attribute__((constructor)) void SelectBitshuffleFunctions() { #if (defined(__i386) || defined(__x86_64__)) - if (CPU().has_avx2()) { + if (CPU().has_avx512()) { + g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx512; + g_bshuf_compress_lz4 = bshuf_compress_lz4_avx512; + g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx512; + } else if (CPU().has_avx2()) { g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx2; g_bshuf_compress_lz4 = bshuf_compress_lz4_avx2; g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx2;