From ee934483eb03a1fde6eee1ef258dd05574f97898 Mon Sep 17 00:00:00 2001 From: luozenglin <37725793+luozenglin@users.noreply.github.com> Date: Thu, 3 Nov 2022 15:12:25 +0800 Subject: [PATCH] [Enhancement](function) optimize the `upper` and `lower` functions using the simd instruction. (#13326) optimize the `upper` and `lower` functions using the simd instruction. --- be/src/vec/functions/function_string.cpp | 25 ++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 1265a0e0b4..7e762dfe8c 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -245,21 +245,25 @@ struct NameToUpper { static constexpr auto name = "upper"; }; -using char_transter_op = int (*)(int); -template +template struct TransferImpl { static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { size_t offset_size = offsets.size(); - res_offsets.resize(offsets.size()); - for (size_t i = 0; i < offset_size; ++i) { - res_offsets[i] = offsets[i]; + if (UNLIKELY(!offset_size)) { + return Status::OK(); } + res_offsets.resize(offset_size); + memcpy(res_offsets.data(), offsets.data(), + offset_size * sizeof(ColumnString::Offsets::value_type)); + size_t data_length = data.size(); res_data.resize(data_length); - for (size_t i = 0; i < data_length; ++i) { - res_data[i] = op(data[i]); + if constexpr (std::is_same_v) { + simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data()); + } else if constexpr (std::is_same_v) { + simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data()); } return Status::OK(); } @@ -275,7 +279,8 @@ struct InitcapImpl { ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { size_t offset_size = offsets.size(); res_offsets.resize(offsets.size()); - memcpy(res_offsets.data(), offsets.data(), offset_size * sizeof(offsets.data())); + memcpy(res_offsets.data(), offsets.data(), + offset_size * sizeof(ColumnString::Offsets::value_type)); size_t data_length = data.size(); res_data.resize(data_length); @@ -620,9 +625,9 @@ using FunctionStringFindInSet = using FunctionUnHex = FunctionStringOperateToNullType; -using FunctionToLower = FunctionStringToString, NameToLower>; +using FunctionToLower = FunctionStringToString, NameToLower>; -using FunctionToUpper = FunctionStringToString, NameToUpper>; +using FunctionToUpper = FunctionStringToString, NameToUpper>; using FunctionToInitcap = FunctionStringToString;