From 28fcc093a8958a6870fec9802b23db07a42bbd7b Mon Sep 17 00:00:00 2001 From: Kai Qiang Wu Date: Mon, 30 Jan 2023 10:33:01 +0800 Subject: [PATCH] [improvement](bitshuffle)Enable avx512 support in bitshuffle for performance boost (#15972) As AVX512 is available in most modern processors, it is good to use them if have performance boost. In latest bitshuffle, AVX512 have been added. We could make it integrated in doris for AVX512 case. Tested with master branch, queries(SSB query q1.1.sql~q4.3.sql total 13 queries) can be boost from 1.4%~3.2%. (use run-ssb-queries.sh 5 times, each time with 100 iterations.) Signed-off-by: Wu, Kaiqiang Co-authored-by: vesslanjin --- be/src/gutil/cpu.cc | 4 ++++ be/src/gutil/cpu.h | 3 +++ .../rowset/segment_v2/bitshuffle_wrapper.cpp | 17 ++++++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/be/src/gutil/cpu.cc b/be/src/gutil/cpu.cc index 91cf92ef63..a7d9bc6540 100644 --- a/be/src/gutil/cpu.cc +++ b/be/src/gutil/cpu.cc @@ -70,6 +70,7 @@ CPU::CPU() has_popcnt_(false), has_avx_(false), has_avx2_(false), + has_avx512_(false), has_aesni_(false), has_non_stop_time_stamp_counter_(false), is_running_in_vm_(false), @@ -201,6 +202,8 @@ void CPU::Initialize() { (xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */; has_aesni_ = (cpu_info[2] & 0x02000000) != 0; has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0; + has_avx512_ = has_avx2_ && (cpu_info7[1] & 0x00010000) != 0 && + (cpu_info7[1] & 0x40000000) != 0 && (cpu_info7[1] & 0x80000000) != 0; } // Get the brand string of the cpu. __cpuid(cpu_info, 0x80000000); @@ -253,6 +256,7 @@ void CPU::Initialize() { #endif } CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const { + if (has_avx512()) return AVX512; if (has_avx2()) return AVX2; if (has_avx()) return AVX; if (has_sse42()) return SSE42; diff --git a/be/src/gutil/cpu.h b/be/src/gutil/cpu.h index f7a12bbe43..82b87a1fb3 100644 --- a/be/src/gutil/cpu.h +++ b/be/src/gutil/cpu.h @@ -60,6 +60,7 @@ public: SSE42, AVX, AVX2, + AVX512, MAX_INTEL_MICRO_ARCHITECTURE }; // Accessors for CPU information. @@ -81,6 +82,7 @@ public: bool has_popcnt() const { return has_popcnt_; } bool has_avx() const { return has_avx_; } bool has_avx2() const { return has_avx2_; } + bool has_avx512() const { return has_avx512_; } bool has_aesni() const { return has_aesni_; } bool has_non_stop_time_stamp_counter() const { return has_non_stop_time_stamp_counter_; } bool is_running_in_vm() const { return is_running_in_vm_; } @@ -107,6 +109,7 @@ private: bool has_popcnt_; bool has_avx_; bool has_avx2_; + bool has_avx512_; bool has_aesni_; bool has_non_stop_time_stamp_counter_; bool is_running_in_vm_; diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp index 7ad20f210c..7e569f92b2 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp +++ b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp @@ -34,6 +34,17 @@ #undef bshuf_compress_lz4 #undef bshuf_decompress_lz4 +// Include the bitshuffle header again, but this time importing the +// AVX512-compiled symbols by defining some macros. +#undef BITSHUFFLE_H +#define bshuf_compress_lz4_bound bshuf_compress_lz4_bound_avx512 +#define bshuf_compress_lz4 bshuf_compress_lz4_avx512 +#define bshuf_decompress_lz4 bshuf_decompress_lz4_avx512 +#include // NOLINT(*) +#undef bshuf_compress_lz4_bound +#undef bshuf_compress_lz4 +#undef bshuf_decompress_lz4 + using base::CPU; namespace doris { @@ -54,7 +65,11 @@ decltype(&bshuf_decompress_lz4) g_bshuf_decompress_lz4; // the cost of a 'std::once' call. __attribute__((constructor)) void SelectBitshuffleFunctions() { #if (defined(__i386) || defined(__x86_64__)) - if (CPU().has_avx2()) { + if (CPU().has_avx512()) { + g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx512; + g_bshuf_compress_lz4 = bshuf_compress_lz4_avx512; + g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx512; + } else if (CPU().has_avx2()) { g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx2; g_bshuf_compress_lz4 = bshuf_compress_lz4_avx2; g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx2;