// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "vec/functions/function_string.h" #include #include #include #include #include "common/status.h" #include "runtime/string_search.hpp" #include "util/url_coding.h" #include "vec/columns/column_string.h" #include "vec/common/pod_array_fwd.h" #include "vec/common/string_ref.h" #include "vec/functions/function_reverse.h" #include "vec/functions/function_string_to_string.h" #include "vec/functions/function_totype.h" #include "vec/functions/simple_function_factory.h" namespace doris::vectorized { struct NameStringASCII { static constexpr auto name = "ascii"; }; struct StringASCII { using ReturnType = DataTypeInt32; static constexpr auto TYPE_INDEX = TypeIndex::String; using Type = String; using ReturnColumnType = ColumnVector; static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, PaddedPODArray& res) { auto size = offsets.size(); res.resize(size); for (int i = 0; i < size; ++i) { const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast(raw_str[0]); } return Status::OK(); } }; struct NameStringLenght { static constexpr auto name = "length"; }; struct StringLengthImpl { using ReturnType = DataTypeInt32; static constexpr auto TYPE_INDEX = TypeIndex::String; using Type = String; using ReturnColumnType = ColumnVector; static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, PaddedPODArray& res) { auto size = offsets.size(); res.resize(size); for (int i = 0; i < size; ++i) { int str_size = offsets[i] - offsets[i - 1]; res[i] = str_size; } return Status::OK(); } }; struct NameStringUtf8Length { static constexpr auto name = "char_length"; }; struct StringUtf8LengthImpl { using ReturnType = DataTypeInt32; static constexpr auto TYPE_INDEX = TypeIndex::String; using Type = String; using ReturnColumnType = ColumnVector; static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, PaddedPODArray& res) { auto size = offsets.size(); res.resize(size); for (int i = 0; i < size; ++i) { const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); int str_size = offsets[i] - offsets[i - 1]; res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size); } return Status::OK(); } }; struct NameStartsWith { static constexpr auto name = "starts_with"; }; struct StartsWithOp { using ResultDataType = DataTypeUInt8; using ResultPaddedPODArray = PaddedPODArray; static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) { re2::StringPiece str_sp(const_cast(strl.data()), strl.length()); re2::StringPiece prefix_sp(const_cast(strr.data()), strr.length()); res = str_sp.starts_with(prefix_sp); } }; struct NameEndsWith { static constexpr auto name = "ends_with"; }; struct EndsWithOp { using ResultDataType = DataTypeUInt8; using ResultPaddedPODArray = PaddedPODArray; static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) { re2::StringPiece str_sp(const_cast(strl.data()), strl.length()); re2::StringPiece prefix_sp(const_cast(strr.data()), strr.length()); res = str_sp.ends_with(prefix_sp); } }; struct NameFindInSet { static constexpr auto name = "find_in_set"; }; struct FindInSetOp { using ResultDataType = DataTypeInt32; using ResultPaddedPODArray = PaddedPODArray; static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) { for (int i = 0; i < strl.length(); ++i) { if (strl[i] == ',') { res = 0; return; } } int32_t token_index = 1; int32_t start = 0; int32_t end; do { end = start; // Position end. while (end < strr.length() && strr[end] != ',') { ++end; } if (strl == std::string_view {strr.data() + start, (size_t)end - start}) { res = token_index; return; } // Re-position start and end past ',' start = end + 1; ++token_index; } while (start < strr.length()); res = 0; } }; struct NameInstr { static constexpr auto name = "instr"; }; // LeftDataType and RightDataType are DataTypeString template struct StringInStrImpl { using ResultDataType = DataTypeInt32; using ResultPaddedPODArray = PaddedPODArray; static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata, const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { StringRef lstr_ref(ldata.data, ldata.size); auto size = roffsets.size(); res.resize(size); for (int i = 0; i < size; ++i) { const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); int r_str_size = roffsets[i] - roffsets[i - 1]; StringRef rstr_ref(r_raw_str, r_str_size); res[i] = execute(lstr_ref, rstr_ref); } return Status::OK(); } static Status vector_scalar(const ColumnString::Chars& ldata, const ColumnString::Offsets& loffsets, const StringRef& rdata, ResultPaddedPODArray& res) { auto size = loffsets.size(); res.resize(size); if (rdata.size == 0) { std::fill(res.begin(), res.end(), 1); return Status::OK(); } const UInt8* begin = ldata.data(); const UInt8* end = begin + ldata.size(); const UInt8* pos = begin; /// Current index in the array of strings. size_t i = 0; std::fill(res.begin(), res.end(), 0); StringRef rstr_ref(rdata.data, rdata.size); StringSearch search(&rstr_ref); while (pos < end) { // search return matched substring start offset pos = (UInt8*)search.search((char*)pos, end - pos); if (pos >= end) { break; } /// Determine which index it refers to. /// begin + value_offsets[i] is the start offset of string at i+1 while (begin + loffsets[i] < pos) { ++i; } /// We check that the entry does not pass through the boundaries of strings. if (pos + rdata.size <= begin + loffsets[i]) { int loc = pos - begin - loffsets[i - 1]; int l_str_size = loffsets[i] - loffsets[i - 1]; size_t len = std::min(l_str_size, loc); loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len); res[i] = loc + 1; } // move to next string offset pos = begin + loffsets[i]; ++i; } return Status::OK(); } static Status vector_vector(const ColumnString::Chars& ldata, const ColumnString::Offsets& loffsets, const ColumnString::Chars& rdata, const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { DCHECK_EQ(loffsets.size(), roffsets.size()); auto size = loffsets.size(); res.resize(size); for (int i = 0; i < size; ++i) { const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); int l_str_size = loffsets[i] - loffsets[i - 1]; StringRef lstr_ref(l_raw_str, l_str_size); const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); int r_str_size = roffsets[i] - roffsets[i - 1]; StringRef rstr_ref(r_raw_str, r_str_size); res[i] = execute(lstr_ref, rstr_ref); } return Status::OK(); } static int execute(const StringRef& strl, const StringRef& strr) { if (strr.size == 0) { return 1; } StringSearch search(&strr); // Hive returns positions starting from 1. int loc = search.search(&strl); if (loc > 0) { size_t len = std::min((size_t)loc, strl.size); loc = simd::VStringFunctions::get_char_len(strl.data, len); } return loc + 1; } }; // the same impl as instr struct NameLocate { static constexpr auto name = "locate"; }; // LeftDataType and RightDataType are DataTypeString template struct StringLocateImpl { using ResultDataType = DataTypeInt32; using ResultPaddedPODArray = PaddedPODArray; static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata, const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { return StringInStrImpl::vector_scalar(rdata, roffsets, ldata, res); } static Status vector_scalar(const ColumnString::Chars& ldata, const ColumnString::Offsets& loffsets, const StringRef& rdata, ResultPaddedPODArray& res) { return StringInStrImpl::scalar_vector(rdata, ldata, loffsets, res); } static Status vector_vector(const ColumnString::Chars& ldata, const ColumnString::Offsets& loffsets, const ColumnString::Chars& rdata, const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { return StringInStrImpl::vector_vector(rdata, roffsets, ldata, loffsets, res); } }; // LeftDataType and RightDataType are DataTypeString template struct StringFunctionImpl { using ResultDataType = typename OP::ResultDataType; using ResultPaddedPODArray = typename OP::ResultPaddedPODArray; static void vector_vector(const ColumnString::Chars& ldata, const ColumnString::Offsets& loffsets, const ColumnString::Chars& rdata, const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { DCHECK_EQ(loffsets.size(), roffsets.size()); auto size = loffsets.size(); res.resize(size); for (int i = 0; i < size; ++i) { const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); int l_str_size = loffsets[i] - loffsets[i - 1]; const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); int r_str_size = roffsets[i] - roffsets[i - 1]; std::string_view lview(l_raw_str, l_str_size); std::string_view rview(r_raw_str, r_str_size); OP::execute(lview, rview, res[i]); } } static void vector_scalar(const ColumnString::Chars& ldata, const ColumnString::Offsets& loffsets, const StringRef& rdata, ResultPaddedPODArray& res) { auto size = loffsets.size(); res.resize(size); std::string_view rview(rdata.data, rdata.size); for (int i = 0; i < size; ++i) { const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); int l_str_size = loffsets[i] - loffsets[i - 1]; std::string_view lview(l_raw_str, l_str_size); OP::execute(lview, rview, res[i]); } } static void scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata, const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { auto size = roffsets.size(); res.resize(size); std::string_view lview(ldata.data, ldata.size); for (int i = 0; i < size; ++i) { const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); int r_str_size = roffsets[i] - roffsets[i - 1]; std::string_view rview(r_raw_str, r_str_size); OP::execute(lview, rview, res[i]); } } }; struct NameToLower { static constexpr auto name = "lower"; }; struct NameToUpper { static constexpr auto name = "upper"; }; template struct TransferImpl { static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { size_t offset_size = offsets.size(); if (UNLIKELY(!offset_size)) { return Status::OK(); } res_offsets.resize(offset_size); memcpy_small_allow_read_write_overflow15( res_offsets.data(), offsets.data(), offset_size * sizeof(ColumnString::Offsets::value_type)); size_t data_length = data.size(); res_data.resize(data_length); if constexpr (std::is_same_v) { simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data()); } else if constexpr (std::is_same_v) { simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data()); } return Status::OK(); } }; // Capitalize first letter struct NameToInitcap { static constexpr auto name = "initcap"; }; struct InitcapImpl { static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { size_t offset_size = offsets.size(); res_offsets.resize(offsets.size()); memcpy_small_allow_read_write_overflow15( res_offsets.data(), offsets.data(), offset_size * sizeof(ColumnString::Offsets::value_type)); size_t data_length = data.size(); res_data.resize(data_length); simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data()); bool need_capitalize = true; for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) { auto end_index = res_offsets[offset_index]; need_capitalize = true; for (size_t i = start_index; i < end_index; ++i) { if (!::isalnum(res_data[i])) { need_capitalize = true; } else if (need_capitalize) { res_data[i] = ::toupper(res_data[i]); need_capitalize = false; } } start_index = end_index; } return Status::OK(); } }; struct NameTrim { static constexpr auto name = "trim"; }; struct NameLTrim { static constexpr auto name = "ltrim"; }; struct NameRTrim { static constexpr auto name = "rtrim"; }; template struct TrimUtil { static Status vector(const ColumnString::Chars& str_data, const ColumnString::Offsets& str_offsets, const StringRef& rhs, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { size_t offset_size = str_offsets.size(); res_offsets.resize(str_offsets.size()); for (size_t i = 0; i < offset_size; ++i) { const char* raw_str = reinterpret_cast(&str_data[str_offsets[i - 1]]); ColumnString::Offset size = str_offsets[i] - str_offsets[i - 1]; StringRef str(raw_str, size); if constexpr (is_ltrim) { str = simd::VStringFunctions::ltrim(str, rhs); } if constexpr (is_rtrim) { str = simd::VStringFunctions::rtrim(str, rhs); } StringOP::push_value_string(std::string_view((char*)str.data, str.size), i, res_data, res_offsets); } return Status::OK(); } }; // This is an implementation of a parameter for the Trim function. template struct Trim1Impl { static constexpr auto name = Name::name; static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { const ColumnPtr column = block.get_by_position(arguments[0]).column; if (const auto* col = assert_cast(column.get())) { auto col_res = ColumnString::create(); char blank[] = " "; StringRef rhs(blank, 1); RETURN_IF_ERROR((TrimUtil::vector( col->get_chars(), col->get_offsets(), rhs, col_res->get_chars(), col_res->get_offsets()))); block.replace_by_position(result, std::move(col_res)); } else { return Status::RuntimeError("Illegal column {} of argument of function {}", block.get_by_position(arguments[0]).column->get_name(), name); } return Status::OK(); } }; // This is an implementation of two parameters for the Trim function. template struct Trim2Impl { static constexpr auto name = Name::name; static DataTypes get_variadic_argument_types() { return {std::make_shared(), std::make_shared()}; } static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { const ColumnPtr column = block.get_by_position(arguments[0]).column; const auto& rcol = assert_cast(block.get_by_position(arguments[1]).column.get()) ->get_data_column_ptr(); if (auto col = assert_cast(column.get())) { if (auto col_right = assert_cast(rcol.get())) { auto col_res = ColumnString::create(); const char* raw_rhs = reinterpret_cast(&(col_right->get_chars()[0])); ColumnString::Offset rhs_size = col_right->get_offsets()[0]; StringRef rhs(raw_rhs, rhs_size); RETURN_IF_ERROR((TrimUtil::vector( col->get_chars(), col->get_offsets(), rhs, col_res->get_chars(), col_res->get_offsets()))); block.replace_by_position(result, std::move(col_res)); } else { return Status::RuntimeError("Illegal column {} of argument of function {}", block.get_by_position(arguments[1]).column->get_name(), name); } } else { return Status::RuntimeError("Illegal column {} of argument of function {}", block.get_by_position(arguments[0]).column->get_name(), name); } return Status::OK(); } }; template class FunctionTrim : public IFunction { public: static constexpr auto name = impl::name; static FunctionPtr create() { return std::make_shared>(); } String get_name() const override { return impl::name; } size_t get_number_of_arguments() const override { return get_variadic_argument_types_impl().size(); } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { if (!is_string_or_fixed_string(arguments[0])) { LOG(FATAL) << fmt::format("Illegal type {} of argument of function {}", arguments[0]->get_name(), get_name()); } return arguments[0]; } // The second parameter of "trim" is a constant. ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } DataTypes get_variadic_argument_types_impl() const override { return impl::get_variadic_argument_types(); } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { return impl::execute(context, block, arguments, result, input_rows_count); } }; static constexpr int MAX_STACK_CIPHER_LEN = 1024 * 64; struct UnHexImpl { static constexpr auto name = "unhex"; using ReturnType = DataTypeString; using ColumnType = ColumnString; static bool check_and_decode_one(char& c, const char src_c, bool flag) { int k = flag ? 16 : 1; int value = src_c - '0'; // 9 = ('9'-'0') if (value >= 0 && value <= 9) { c += value * k; return true; } value = src_c - 'A'; // 5 = ('F'-'A') if (value >= 0 && value <= 5) { c += (value + 10) * k; return true; } value = src_c - 'a'; // 5 = ('f'-'a') if (value >= 0 && value <= 5) { c += (value + 10) * k; return true; } // not in ( ['0','9'], ['a','f'], ['A','F'] ) return false; } static int hex_decode(const char* src_str, size_t src_len, char* dst_str) { // if str length is odd or 0, return empty string like mysql dose. if ((src_len & 1) != 0 or src_len == 0) { return 0; } //check and decode one character at the same time // character in ( ['0','9'], ['a','f'], ['A','F'] ), return 'NULL' like mysql dose. for (auto i = 0, dst_index = 0; i < src_len; i += 2, dst_index++) { char c = 0; // combine two character into dst_str one character bool left_4bits_flag = check_and_decode_one(c, *(src_str + i), true); bool right_4bits_flag = check_and_decode_one(c, *(src_str + i + 1), false); if (!left_4bits_flag || !right_4bits_flag) { return 0; } *(dst_str + dst_index) = c; } return src_len / 2; } static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets, NullMap& null_map) { auto rows_count = offsets.size(); dst_offsets.resize(rows_count); for (int i = 0; i < rows_count; ++i) { if (null_map[i]) { StringOP::push_null_string(i, dst_data, dst_offsets, null_map); continue; } auto source = reinterpret_cast(&data[offsets[i - 1]]); size_t srclen = offsets[i] - offsets[i - 1]; if (srclen == 0) { StringOP::push_empty_string(i, dst_data, dst_offsets); continue; } char dst_array[MAX_STACK_CIPHER_LEN]; char* dst = dst_array; int cipher_len = srclen / 2; std::unique_ptr dst_uptr; if (cipher_len > MAX_STACK_CIPHER_LEN) { dst_uptr.reset(new char[cipher_len]); dst = dst_uptr.get(); } int outlen = hex_decode(source, srclen, dst); if (outlen < 0) { StringOP::push_null_string(i, dst_data, dst_offsets, null_map); } else { StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data, dst_offsets); } } return Status::OK(); } }; struct NameStringSpace { static constexpr auto name = "space"; }; struct StringSpace { using ReturnType = DataTypeString; static constexpr auto TYPE_INDEX = TypeIndex::Int32; using Type = Int32; using ReturnColumnType = ColumnString; static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { res_offsets.resize(data.size()); size_t input_size = res_offsets.size(); fmt::memory_buffer buffer; for (size_t i = 0; i < input_size; ++i) { buffer.clear(); if (data[i] > 0) { for (size_t j = 0; j < data[i]; ++j) { buffer.push_back(' '); } StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, res_offsets); } else { StringOP::push_empty_string(i, res_data, res_offsets); } } return Status::OK(); } }; struct ToBase64Impl { static constexpr auto name = "to_base64"; using ReturnType = DataTypeString; using ColumnType = ColumnString; static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets, NullMap& null_map) { auto rows_count = offsets.size(); dst_offsets.resize(rows_count); for (int i = 0; i < rows_count; ++i) { if (null_map[i]) { StringOP::push_null_string(i, dst_data, dst_offsets, null_map); continue; } auto source = reinterpret_cast(&data[offsets[i - 1]]); size_t srclen = offsets[i] - offsets[i - 1]; if (srclen == 0) { StringOP::push_null_string(i, dst_data, dst_offsets, null_map); continue; } char dst_array[MAX_STACK_CIPHER_LEN]; char* dst = dst_array; int cipher_len = (int)(4.0 * ceil((double)srclen / 3.0)); std::unique_ptr dst_uptr; if (cipher_len > MAX_STACK_CIPHER_LEN) { dst_uptr.reset(new char[cipher_len]); dst = dst_uptr.get(); } int outlen = base64_encode((const unsigned char*)source, srclen, (unsigned char*)dst); if (outlen < 0) { StringOP::push_null_string(i, dst_data, dst_offsets, null_map); } else { StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data, dst_offsets); } } return Status::OK(); } }; struct FromBase64Impl { static constexpr auto name = "from_base64"; using ReturnType = DataTypeString; using ColumnType = ColumnString; static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets, NullMap& null_map) { auto rows_count = offsets.size(); dst_offsets.resize(rows_count); for (int i = 0; i < rows_count; ++i) { if (null_map[i]) { StringOP::push_null_string(i, dst_data, dst_offsets, null_map); continue; } auto source = reinterpret_cast(&data[offsets[i - 1]]); size_t srclen = offsets[i] - offsets[i - 1]; if (srclen == 0) { StringOP::push_null_string(i, dst_data, dst_offsets, null_map); continue; } char dst_array[MAX_STACK_CIPHER_LEN]; char* dst = dst_array; int cipher_len = srclen; std::unique_ptr dst_uptr; if (cipher_len > MAX_STACK_CIPHER_LEN) { dst_uptr.reset(new char[cipher_len]); dst = dst_uptr.get(); } int outlen = base64_decode(source, srclen, dst); if (outlen < 0) { StringOP::push_null_string(i, dst_data, dst_offsets, null_map); } else { StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data, dst_offsets); } } return Status::OK(); } }; struct StringAppendTrailingCharIfAbsent { static constexpr auto name = "append_trailing_char_if_absent"; using Chars = ColumnString::Chars; using Offsets = ColumnString::Offsets; using ReturnType = DataTypeString; using ColumnType = ColumnString; static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets, const Chars& rdata, const Offsets& roffsets, Chars& res_data, Offsets& res_offsets, NullMap& null_map_data) { DCHECK_EQ(loffsets.size(), roffsets.size()); size_t input_rows_count = loffsets.size(); res_offsets.resize(input_rows_count); fmt::memory_buffer buffer; for (size_t i = 0; i < input_rows_count; ++i) { buffer.clear(); int l_size = loffsets[i] - loffsets[i - 1]; const auto l_raw = reinterpret_cast(&ldata[loffsets[i - 1]]); int r_size = roffsets[i] - roffsets[i - 1]; const auto r_raw = reinterpret_cast(&rdata[roffsets[i - 1]]); if (r_size != 1) { StringOP::push_null_string(i, res_data, res_offsets, null_map_data); continue; } if (l_raw[l_size - 1] == r_raw[0]) { StringOP::push_value_string(std::string_view(l_raw, l_size), i, res_data, res_offsets); continue; } buffer.append(l_raw, l_raw + l_size); buffer.append(r_raw, r_raw + 1); StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, res_offsets); } } static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets, const StringRef& rdata, Chars& res_data, Offsets& res_offsets, NullMap& null_map_data) { size_t input_rows_count = loffsets.size(); res_offsets.resize(input_rows_count); fmt::memory_buffer buffer; if (rdata.size != 1) { for (size_t i = 0; i < input_rows_count; ++i) { StringOP::push_null_string(i, res_data, res_offsets, null_map_data); } return; } for (size_t i = 0; i < input_rows_count; ++i) { buffer.clear(); int l_size = loffsets[i] - loffsets[i - 1]; const auto l_raw = reinterpret_cast(&ldata[loffsets[i - 1]]); if (l_raw[l_size - 1] == rdata.data[0]) { StringOP::push_value_string(std::string_view(l_raw, l_size), i, res_data, res_offsets); continue; } buffer.append(l_raw, l_raw + l_size); buffer.append(rdata.begin(), rdata.end()); StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, res_offsets); } } static void scalar_vector(FunctionContext* context, const StringRef& ldata, const Chars& rdata, const Offsets& roffsets, Chars& res_data, Offsets& res_offsets, NullMap& null_map_data) { size_t input_rows_count = roffsets.size(); res_offsets.resize(input_rows_count); fmt::memory_buffer buffer; for (size_t i = 0; i < input_rows_count; ++i) { buffer.clear(); int r_size = roffsets[i] - roffsets[i - 1]; const auto r_raw = reinterpret_cast(&rdata[roffsets[i - 1]]); if (r_size != 1) { StringOP::push_null_string(i, res_data, res_offsets, null_map_data); continue; } if (ldata.size == 0 || ldata.back() == r_raw[0]) { StringOP::push_value_string(ldata.to_string_view(), i, res_data, res_offsets); continue; } buffer.append(ldata.begin(), ldata.end()); buffer.append(r_raw, r_raw + 1); StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, res_offsets); } } }; struct StringLPad { static constexpr auto name = "lpad"; static constexpr auto is_lpad = true; }; struct StringRPad { static constexpr auto name = "rpad"; static constexpr auto is_lpad = false; }; template using StringStartsWithImpl = StringFunctionImpl; template using StringEndsWithImpl = StringFunctionImpl; template using StringFindInSetImpl = StringFunctionImpl; // ready for regist function using FunctionStringASCII = FunctionUnaryToType; using FunctionStringLength = FunctionUnaryToType; using FunctionStringUTF8Length = FunctionUnaryToType; using FunctionStringSpace = FunctionUnaryToType; using FunctionStringStartsWith = FunctionBinaryToType; using FunctionStringEndsWith = FunctionBinaryToType; using FunctionStringInstr = FunctionBinaryToType; using FunctionStringLocate = FunctionBinaryToType; using FunctionStringFindInSet = FunctionBinaryToType; using FunctionToLower = FunctionStringToString, NameToLower>; using FunctionToUpper = FunctionStringToString, NameToUpper>; using FunctionToInitcap = FunctionStringToString; using FunctionUnHex = FunctionStringOperateToNullType; using FunctionToBase64 = FunctionStringOperateToNullType; using FunctionFromBase64 = FunctionStringOperateToNullType; using FunctionStringAppendTrailingCharIfAbsent = FunctionBinaryStringOperateToNullType; using FunctionStringLPad = FunctionStringPad; using FunctionStringRPad = FunctionStringPad; void register_function_string(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function>>(); factory.register_function>>(); factory.register_function>>(); factory.register_function>>(); factory.register_function>>(); factory.register_function>>(); factory.register_function(); factory.register_function>(); factory.register_function>(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function>(); factory.register_function>(); factory.register_function>(); factory.register_function>(); factory.register_function>(); factory.register_function>(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function>(); factory.register_function>(); factory.register_function>(); factory.register_function>(); /// @TEMPORARY: for be_exec_version=3 factory.register_alternative_function>(); factory.register_alternative_function>(); factory.register_alternative_function(); factory.register_alternative_function(); factory.register_alternative_function(); factory.register_alias(FunctionLeft::name, "strleft"); factory.register_alias(FunctionRight::name, "strright"); factory.register_alias(SubstringUtil::name, "substr"); factory.register_alias(FunctionToLower::name, "lcase"); factory.register_alias(FunctionToUpper::name, "ucase"); factory.register_alias(FunctionStringDigestOneArg::name, "md5"); factory.register_alias(FunctionStringUTF8Length::name, "character_length"); factory.register_alias(FunctionStringDigestOneArg::name, "sm3"); factory.register_alias(FunctionStringDigestSHA1::name, "sha"); } } // namespace doris::vectorized