From d383f1f3d7734ea260470fb288db18e43d57f78e Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Sat, 29 Apr 2023 14:50:39 +0800 Subject: [PATCH] [optimization](simd) optimize count_zero_num for ColumnNullable #19124 --- be/src/util/simd/bits.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h index a38363d7ff..45f82b23ac 100644 --- a/be/src/util/simd/bits.h +++ b/be/src/util/simd/bits.h @@ -87,6 +87,36 @@ inline size_t count_zero_num(const int8_t* __restrict data, const uint8_t* __res size_t size) { size_t num = 0; const int8_t* end = data + size; +#if defined(__SSE2__) && defined(__POPCNT__) + const __m128i zero16 = _mm_setzero_si128(); + const int8_t* end64 = data + (size / 64 * 64); + + for (; data < end64; data += 64) { + num += __builtin_popcountll( + static_cast(_mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast(data)), + zero16), + _mm_loadu_si128(reinterpret_cast(null_map))))) | + (static_cast(_mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8( + _mm_loadu_si128(reinterpret_cast(data + 16)), + zero16), + _mm_loadu_si128(reinterpret_cast(null_map + 16))))) + << 16u) | + (static_cast(_mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8( + _mm_loadu_si128(reinterpret_cast(data + 32)), + zero16), + _mm_loadu_si128(reinterpret_cast(null_map + 32))))) + << 32u) | + (static_cast(_mm_movemask_epi8(_mm_or_si128( + _mm_cmpeq_epi8( + _mm_loadu_si128(reinterpret_cast(data + 48)), + zero16), + _mm_loadu_si128(reinterpret_cast(null_map + 48))))) + << 48u)); + } +#endif for (; data < end; ++data, ++null_map) { num += ((*data == 0) | *null_map); }