From fb50626075227e4b8b8a6da373aa7c9b796bd618 Mon Sep 17 00:00:00 2001 From: ZhangYu0123 <67053339+ZhangYu0123@users.noreply.github.com> Date: Sat, 8 Apr 2023 17:05:34 +0800 Subject: [PATCH] [optimize](string) optimize concat function by SIMD memcpy (#18458) Optimize concat function 29% up by memcpy_small_allow_read_write_overflow15. Optimize string functions list: concat, convert_to, mask, initcap, lower, upper. concat function has 29% up: --- be/src/vec/functions/function_string.cpp | 10 ++++++---- be/src/vec/functions/function_string.h | 21 +++++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 8952ff73da..34c65219f0 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -255,8 +255,9 @@ struct TransferImpl { } res_offsets.resize(offset_size); - memcpy(res_offsets.data(), offsets.data(), - offset_size * sizeof(ColumnString::Offsets::value_type)); + memcpy_small_allow_read_write_overflow15( + res_offsets.data(), offsets.data(), + offset_size * sizeof(ColumnString::Offsets::value_type)); size_t data_length = data.size(); res_data.resize(data_length); @@ -279,8 +280,9 @@ struct InitcapImpl { ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { size_t offset_size = offsets.size(); res_offsets.resize(offsets.size()); - memcpy(res_offsets.data(), offsets.data(), - offset_size * sizeof(ColumnString::Offsets::value_type)); + memcpy_small_allow_read_write_overflow15( + res_offsets.data(), offsets.data(), + offset_size * sizeof(ColumnString::Offsets::value_type)); size_t data_length = data.size(); res_data.resize(data_length); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index f257ad3537..83b6782192 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -368,8 +368,9 @@ private: const char lower, const char number) { result.get_chars().resize(source.get_chars().size()); result.get_offsets().resize(source.get_offsets().size()); - memcpy(result.get_offsets().data(), source.get_offsets().data(), - source.get_offsets().size() * sizeof(ColumnString::Offset)); + memcpy_small_allow_read_write_overflow15( + result.get_offsets().data(), source.get_offsets().data(), + source.get_offsets().size() * sizeof(ColumnString::Offset)); const unsigned char* src = source.get_chars().data(); const size_t size = source.get_chars().size(); @@ -452,8 +453,9 @@ private: auto* offsets = src.get_offsets().data(); result.get_chars().resize(src.get_chars().size()); result.get_offsets().resize(src.get_offsets().size()); - memcpy(result.get_offsets().data(), src.get_offsets().data(), - src.get_offsets().size() * sizeof(ColumnString::Offset)); + memcpy_small_allow_read_write_overflow15( + result.get_offsets().data(), src.get_offsets().data(), + src.get_offsets().size() * sizeof(ColumnString::Offset)); auto* res = result.get_chars().data(); for (ssize_t i = 0; i != num_rows; ++i) { @@ -709,9 +711,12 @@ public: auto& current_chars = *chars_list[j]; int size = current_offsets[i] - current_offsets[i - 1]; - memcpy(&res_data[res_offset[i - 1]] + current_length, - ¤t_chars[current_offsets[i - 1]], size); - current_length += size; + if (size > 0) { + memcpy_small_allow_read_write_overflow15( + &res_data[res_offset[i - 1]] + current_length, + ¤t_chars[current_offsets[i - 1]], size); + current_length += size; + } } res_offset[i] = res_offset[i - 1] + current_length; } @@ -2530,7 +2535,7 @@ public: void _utf8_to_pinyin(const char* in, size_t in_len, char* out, size_t* out_len) { auto do_memcpy = [](char*& dest, const char*& from, size_t size) { - memcpy(dest, from, size); + memcpy_small_allow_read_write_overflow15(dest, from, size); dest += size; from += size; };