[improvement](bitshuffle)Enable avx512 support in bitshuffle for performance boost (#15972)

As AVX512 is available in most modern processors, it is good to use them if have performance boost.
In latest bitshuffle, AVX512 have been added. We could make it integrated in doris for AVX512 case.

Tested with master branch, queries(SSB query q1.1.sql~q4.3.sql total 13 queries) can be boost from 1.4%~3.2%. (use run-ssb-queries.sh 5 times, each time with 100 iterations.)

Signed-off-by: Wu, Kaiqiang <kaiqiang.wu@intel.com>
Co-authored-by: vesslanjin <jun.i.jin@intel.com>
This commit is contained in:
Kai Qiang Wu
2023-01-30 10:33:01 +08:00
committed by GitHub
parent ec4a56922f
commit 28fcc093a8
3 changed files with 23 additions and 1 deletions

View File

@ -70,6 +70,7 @@ CPU::CPU()
has_popcnt_(false),
has_avx_(false),
has_avx2_(false),
has_avx512_(false),
has_aesni_(false),
has_non_stop_time_stamp_counter_(false),
is_running_in_vm_(false),
@ -201,6 +202,8 @@ void CPU::Initialize() {
(xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
has_avx512_ = has_avx2_ && (cpu_info7[1] & 0x00010000) != 0 &&
(cpu_info7[1] & 0x40000000) != 0 && (cpu_info7[1] & 0x80000000) != 0;
}
// Get the brand string of the cpu.
__cpuid(cpu_info, 0x80000000);
@ -253,6 +256,7 @@ void CPU::Initialize() {
#endif
}
CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
if (has_avx512()) return AVX512;
if (has_avx2()) return AVX2;
if (has_avx()) return AVX;
if (has_sse42()) return SSE42;

View File

@ -60,6 +60,7 @@ public:
SSE42,
AVX,
AVX2,
AVX512,
MAX_INTEL_MICRO_ARCHITECTURE
};
// Accessors for CPU information.
@ -81,6 +82,7 @@ public:
bool has_popcnt() const { return has_popcnt_; }
bool has_avx() const { return has_avx_; }
bool has_avx2() const { return has_avx2_; }
bool has_avx512() const { return has_avx512_; }
bool has_aesni() const { return has_aesni_; }
bool has_non_stop_time_stamp_counter() const { return has_non_stop_time_stamp_counter_; }
bool is_running_in_vm() const { return is_running_in_vm_; }
@ -107,6 +109,7 @@ private:
bool has_popcnt_;
bool has_avx_;
bool has_avx2_;
bool has_avx512_;
bool has_aesni_;
bool has_non_stop_time_stamp_counter_;
bool is_running_in_vm_;

View File

@ -34,6 +34,17 @@
#undef bshuf_compress_lz4
#undef bshuf_decompress_lz4
// Include the bitshuffle header again, but this time importing the
// AVX512-compiled symbols by defining some macros.
#undef BITSHUFFLE_H
#define bshuf_compress_lz4_bound bshuf_compress_lz4_bound_avx512
#define bshuf_compress_lz4 bshuf_compress_lz4_avx512
#define bshuf_decompress_lz4 bshuf_decompress_lz4_avx512
#include <bitshuffle/bitshuffle.h> // NOLINT(*)
#undef bshuf_compress_lz4_bound
#undef bshuf_compress_lz4
#undef bshuf_decompress_lz4
using base::CPU;
namespace doris {
@ -54,7 +65,11 @@ decltype(&bshuf_decompress_lz4) g_bshuf_decompress_lz4;
// the cost of a 'std::once' call.
__attribute__((constructor)) void SelectBitshuffleFunctions() {
#if (defined(__i386) || defined(__x86_64__))
if (CPU().has_avx2()) {
if (CPU().has_avx512()) {
g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx512;
g_bshuf_compress_lz4 = bshuf_compress_lz4_avx512;
g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx512;
} else if (CPU().has_avx2()) {
g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx2;
g_bshuf_compress_lz4 = bshuf_compress_lz4_avx2;
g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx2;