[Optimization](string) optimize constant empty string compare ( column='', column!='') (#18321)

Optimize constant empty string compare:
(1) When the constant empy string '' (size is 0), we can compare offsets in SIMD directly.

q10: SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
q11: SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
q12: SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
q13: SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
q14: SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
Issue Number: close #xxx
This commit is contained in:
ZhangYu0123
2023-04-08 16:04:10 +08:00
committed by GitHub
parent 0517616242
commit 58bbd46c65

View File

@ -211,15 +211,23 @@ struct StringEqualsImpl {
ColumnString::Offset b_size,
PaddedPODArray<UInt8>& c) {
size_t size = a_offsets.size();
ColumnString::Offset prev_a_offset = 0;
for (size_t i = 0; i < size; ++i) {
auto a_size = a_offsets[i] - prev_a_offset;
c[i] = positive == memequal_small_allow_overflow15(a_data.data() + prev_a_offset,
a_size, b_data.data(), b_size);
prev_a_offset = a_offsets[i];
if (b_size == 0) {
auto* __restrict data = c.data();
auto* __restrict offsets = a_offsets.data();
for (size_t i = 0; i < size; ++i) {
data[i] =
positive ? (offsets[i] == offsets[i - 1]) : (offsets[i] != offsets[i - 1]);
}
} else {
ColumnString::Offset prev_a_offset = 0;
const auto* a_pos = a_data.data();
const auto* b_pos = b_data.data();
for (size_t i = 0; i < size; ++i) {
auto a_size = a_offsets[i] - prev_a_offset;
c[i] = positive == memequal_small_allow_overflow15(a_pos + prev_a_offset, a_size,
b_pos, b_size);
prev_a_offset = a_offsets[i];
}
}
}