From 58bbd46c65805c2de394b8da331033520fc1bd35 Mon Sep 17 00:00:00 2001 From: ZhangYu0123 <67053339+ZhangYu0123@users.noreply.github.com> Date: Sat, 8 Apr 2023 16:04:10 +0800 Subject: [PATCH] [Optimization](string) optimize constant empty string compare ( column='', column!='') (#18321) Optimize constant empty string compare: (1) When the constant empy string '' (size is 0), we can compare offsets in SIMD directly. q10: SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; q11: SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; q12: SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; q13: SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; q14: SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; Issue Number: close #xxx --- be/src/vec/functions/functions_comparison.h | 26 ++++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h index 714467c576..1d72a5d5b8 100644 --- a/be/src/vec/functions/functions_comparison.h +++ b/be/src/vec/functions/functions_comparison.h @@ -211,15 +211,23 @@ struct StringEqualsImpl { ColumnString::Offset b_size, PaddedPODArray& c) { size_t size = a_offsets.size(); - ColumnString::Offset prev_a_offset = 0; - - for (size_t i = 0; i < size; ++i) { - auto a_size = a_offsets[i] - prev_a_offset; - - c[i] = positive == memequal_small_allow_overflow15(a_data.data() + prev_a_offset, - a_size, b_data.data(), b_size); - - prev_a_offset = a_offsets[i]; + if (b_size == 0) { + auto* __restrict data = c.data(); + auto* __restrict offsets = a_offsets.data(); + for (size_t i = 0; i < size; ++i) { + data[i] = + positive ? (offsets[i] == offsets[i - 1]) : (offsets[i] != offsets[i - 1]); + } + } else { + ColumnString::Offset prev_a_offset = 0; + const auto* a_pos = a_data.data(); + const auto* b_pos = b_data.data(); + for (size_t i = 0; i < size; ++i) { + auto a_size = a_offsets[i] - prev_a_offset; + c[i] = positive == memequal_small_allow_overflow15(a_pos + prev_a_offset, a_size, + b_pos, b_size); + prev_a_offset = a_offsets[i]; + } } }