From 18ddde9250c499e47605c6105bf2b3deb8b2d0a4 Mon Sep 17 00:00:00 2001 From: saltonz Date: Mon, 11 Mar 2024 02:46:06 +0000 Subject: [PATCH] idispatch SIMD function by cpuinfo for PFoR codec --- deps/oblib/src/lib/codec/ob_simd_fixed_pfor.h | 125 +++++++++--------- 1 file changed, 59 insertions(+), 66 deletions(-) diff --git a/deps/oblib/src/lib/codec/ob_simd_fixed_pfor.h b/deps/oblib/src/lib/codec/ob_simd_fixed_pfor.h index 9325342d6..65a204b73 100644 --- a/deps/oblib/src/lib/codec/ob_simd_fixed_pfor.h +++ b/deps/oblib/src/lib/codec/ob_simd_fixed_pfor.h @@ -16,6 +16,7 @@ #include "ob_codecs.h" #include "ob_bp_util.h" #include "ob_generated_unalign_simd_bp_func.h" +#include "common/ob_target_specific.h" namespace oceanbase { @@ -110,7 +111,7 @@ public: static OB_INLINE void inner_do_encode( const UIntT *__restrict in, uint32_t n, - char *__restrict out, + char *out, const uint32_t b, const uint32_t bx, const uint64_t out_buf_len, @@ -124,20 +125,7 @@ public: char *orig_out = out; if (bx == 0) { // no exception - if (4 == sizeof(UIntT)) { - // uint32_t simd packing - uSIMD_fastpackwithoutmask_128_32((uint32_t *)in, reinterpret_cast<__m128i *>(out), b); - out += BlockSize * b / 8; - } else if (2 == sizeof(UIntT)) { - // uint16_t simd packing - uSIMD_fastpackwithoutmask_128_16((const uint16_t *)in, reinterpret_cast<__m128i *>(out), b); - out += BlockSize * b / 8; - } else { - // uint64_t, uint8_t use scalar packing - uint64_t out_pos = 0; - scalar_bit_packing(in, BlockSize, b, out, out_buf_len, out_pos); - out += out_pos; - } + inner_bit_packing(in, out, b, out_buf_len); } else { uint64_t i = 0; uint64_t xn = 0;// the count of exceptions @@ -174,18 +162,7 @@ public: } // packing data - if (4 == sizeof(UIntT)) { - uSIMD_fastpackwithoutmask_128_32((uint32_t *)_in, reinterpret_cast<__m128i *>(out), b); - out += BlockSize * b / 8; - } else if (2 == sizeof(UIntT)) { - uSIMD_fastpackwithoutmask_128_16((const uint16_t *)_in, reinterpret_cast<__m128i *>(out), b); - out += BlockSize * b / 8; - } else { - // uint8 & uint16 does not support simd packing - uint64_t out_pos = 0; - scalar_bit_packing(_in, BlockSize, b, out, out_buf_len, out_pos); - out += out_pos; - } + inner_bit_packing(_in, out, b, out_buf_len); } len = (uint32_t)(out - orig_out); } @@ -307,24 +284,7 @@ public: b = *in++; // bit width if (0 == (b & 0x80)) { // no exception value, direct unpack - if (4 == sizeof(UIntT)) { - uSIMD_fastunpack_128_32(reinterpret_cast(in), reinterpret_cast(out), b); - in += BlockSize * b / 8; // convert to byte; - } else if (2 == sizeof(UIntT)) { - uSIMD_fastunpack_128_16(reinterpret_cast(in), reinterpret_cast(out), b); - in += BlockSize * b / 8; - } else { - // uint8 & uint64 does not support simd packing - uint64_t in_pos = in - _in; - uint64_t in_len = length; - uint64_t tmp_out_pos = 0; - uint64_t tmp_out_buf_len = BlockSize; - UIntT *tmp_out = (UIntT *)out; - scalar_bit_unpacking(_in, in_len, in_pos, BlockSize, b, - tmp_out, tmp_out_buf_len, tmp_out_pos); - in = _in + in_pos; - } - out += BlockSize * sizeof(UIntT); + inner_bit_unpacking(in, _in, length, out, b); } else { b &= (0x80 - 1); // get normal bit width bx = *in++; // get exception bit width @@ -346,27 +306,9 @@ public: in = _in + in_pos; } - if (4 == sizeof(UIntT)) { - // unpacking data - uSIMD_fastunpack_128_32(reinterpret_cast(in), reinterpret_cast(out), b); - in += BlockSize * b / 8; - } else if (2 == sizeof(UIntT)) { - uSIMD_fastunpack_128_16(reinterpret_cast(in), reinterpret_cast(out), b); - in += BlockSize * b / 8; - } else { - // uint8 & uint64 does not support simd packing - uint64_t in_pos = in - _in; - uint64_t in_len = length; - uint64_t tmp_out_pos = 0; - uint64_t tmp_out_buf_len = BlockSize; - UIntT *tmp_out = (UIntT *)out; - scalar_bit_unpacking(_in, in_len, in_pos, BlockSize, b, - tmp_out, tmp_out_buf_len, tmp_out_pos); - in = _in + in_pos; - } - - UIntT *out_arr = reinterpret_cast(out); - out += BlockSize * sizeof(UIntT); + // unpacking data + inner_bit_unpacking(in, _in, length, out, b); + UIntT *out_arr = reinterpret_cast(_out + out_pos); // patch exception, TODO, oushen, optimize later int64_t ex_idx = 0; @@ -440,6 +382,57 @@ public: } virtual const char * name() const override { return "ObSIMDFixedPFor128"; } +private: + template + static OB_INLINE void inner_bit_packing( + const UIntT *__restrict in, + char *&out, + const uint32_t bit, + const uint64_t out_buf_len) + { + if (4 == sizeof(UIntT) && common::is_arch_supported(ObTargetArch::AVX2)) { + // uint32_t simd packing + uSIMD_fastpackwithoutmask_128_32((uint32_t *)in, reinterpret_cast<__m128i *>(out), bit); + out += BlockSize * bit / 8; + } else if (2 == sizeof(UIntT) && common::is_arch_supported(ObTargetArch::AVX2)) { + // uint16_t simd packing + uSIMD_fastpackwithoutmask_128_16((const uint16_t *)in, reinterpret_cast<__m128i *>(out), bit); + out += BlockSize * bit / 8; + } else { + // uint64_t, uint8_t use scalar packing + uint64_t out_pos = 0; + scalar_bit_packing(in, BlockSize, bit, out, out_buf_len, out_pos); + out += out_pos; + } + } + + template + static OB_INLINE void inner_bit_unpacking( + const char *&in, + const char *_in, + const uint64_t length, + char *&out, + const uint32_t bit) + { + if (4 == sizeof(UIntT) && common::is_arch_supported(ObTargetArch::AVX2)) { + uSIMD_fastunpack_128_32(reinterpret_cast(in), reinterpret_cast(out), bit); + in += BlockSize * bit / 8; // convert to byte; + } else if (2 == sizeof(UIntT) && common::is_arch_supported(ObTargetArch::AVX2)) { + uSIMD_fastunpack_128_16(reinterpret_cast(in), reinterpret_cast(out), bit); + in += BlockSize * bit / 8; + } else { + // uint8 & uint64 does not support simd packing + uint64_t in_pos = in - _in; + uint64_t in_len = length; + uint64_t tmp_out_pos = 0; + uint64_t tmp_out_buf_len = BlockSize; + UIntT *tmp_out = (UIntT *)out; + scalar_bit_unpacking(_in, in_len, in_pos, BlockSize, bit, + tmp_out, tmp_out_buf_len, tmp_out_pos); + in = _in + in_pos; + } + out += BlockSize * sizeof(UIntT); + } }; } // namespace common