From 5efafefedaf261d3ef87731bff28f5c7df7faa99 Mon Sep 17 00:00:00 2001 From: ZhangYu0123 <67053339+ZhangYu0123@users.noreply.github.com> Date: Mon, 10 Apr 2023 10:56:07 +0800 Subject: [PATCH] [refactor](string) remove volnitsky search algorithm (#18474) --- be/src/runtime/string_search.hpp | 8 +- be/src/vec/common/string_searcher.h | 109 +++- be/src/vec/common/volnitsky.h | 478 ------------------ .../functions_multi_string_position.cpp | 4 +- 4 files changed, 105 insertions(+), 494 deletions(-) delete mode 100644 be/src/vec/common/volnitsky.h diff --git a/be/src/runtime/string_search.hpp b/be/src/runtime/string_search.hpp index a7d00673aa..63ddaf265c 100644 --- a/be/src/runtime/string_search.hpp +++ b/be/src/runtime/string_search.hpp @@ -23,7 +23,7 @@ #include "common/logging.h" #include "vec/common/string_ref.h" -#include "vec/common/volnitsky.h" +#include "vec/common/string_searcher.h" namespace doris { @@ -36,7 +36,7 @@ public: void set_pattern(const StringRef* pattern) { _pattern = pattern; - _vol_searcher.reset(new Volnitsky(pattern->data, pattern->size)); + _str_searcher.reset(new ASCIICaseSensitiveStringSearcher(pattern->data, pattern->size)); } // search for this pattern in str. @@ -68,14 +68,14 @@ public: return str + len; } - return _vol_searcher->search(str, len); + return _str_searcher->search(str, len); } inline size_t get_pattern_length() { return _pattern ? _pattern->size : 0; } private: const StringRef* _pattern; - std::unique_ptr _vol_searcher; + std::unique_ptr _str_searcher; }; } // namespace doris diff --git a/be/src/vec/common/string_searcher.h b/be/src/vec/common/string_searcher.h index 19fd3d2c86..a7a0ede731 100644 --- a/be/src/vec/common/string_searcher.h +++ b/be/src/vec/common/string_searcher.h @@ -27,6 +27,7 @@ #include #include +#include "vec/common/string_ref.h" #include "vec/common/string_utils/string_utils.h" #ifdef __SSE2__ @@ -45,7 +46,6 @@ namespace doris { // } /** Variants for searching a substring in a string. - * In most cases, performance is less than Volnitsky (see Volnitsky.h). */ class StringSearcherBase { @@ -119,8 +119,36 @@ public: template // requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool compare(const CharT* /*haystack*/, const CharT* /*haystack_end*/, - const CharT* pos) const { + const CharT* search(const CharT* haystack, size_t haystack_size) const { + // cast to unsigned int8 to be consitent with needle type + // ensure unsigned type compare + return reinterpret_cast( + _search(reinterpret_cast(haystack), haystack_size)); + } + + template + // requires (sizeof(CharT) == 1) + const CharT* search(const CharT* haystack, const CharT* haystack_end) const { + // cast to unsigned int8 to be consitent with needle type + // ensure unsigned type compare + return reinterpret_cast( + _search(reinterpret_cast(haystack), + reinterpret_cast(haystack_end))); + } + + template + // requires (sizeof(CharT) == 1) + ALWAYS_INLINE bool compare(const CharT* haystack, const CharT* haystack_end, CharT* pos) const { + // cast to unsigned int8 to be consitent with needle type + // ensure unsigned type compare + return _compare(reinterpret_cast(haystack), + reinterpret_cast(haystack_end), + reinterpret_cast(pos)); + } + +private: + ALWAYS_INLINE bool _compare(uint8_t* /*haystack*/, uint8_t* /*haystack_end*/, + uint8_t* pos) const { #ifdef __SSE4_1__ if (needle_end - needle > n && page_safe(pos)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); @@ -155,9 +183,7 @@ public: return false; } - template - // requires (sizeof(CharT) == 1) - const CharT* search(const CharT* haystack, const CharT* const haystack_end) const { + const uint8_t* _search(const uint8_t* haystack, const uint8_t* haystack_end) const { if (needle == needle_end) return haystack; const auto needle_size = needle_end - needle; @@ -265,10 +291,8 @@ public: return haystack_end; } - template - // requires (sizeof(CharT) == 1) - const CharT* search(const CharT* haystack, const size_t haystack_size) const { - return search(haystack, haystack + haystack_size); + const uint8_t* _search(const uint8_t* haystack, const size_t haystack_size) const { + return _search(haystack, haystack + haystack_size); } }; @@ -400,4 +424,69 @@ struct LibCASCIICaseInsensitiveStringSearcher : public StringSearcherBase { } }; +template +class MultiStringSearcherBase { +private: + /// needles + const std::vector& needles; + /// searchers + std::vector searchers; + /// last index of needles that was not processed + size_t last; + +public: + explicit MultiStringSearcherBase(const std::vector& needles_) + : needles {needles_}, last {0} { + searchers.reserve(needles.size()); + + size_t size = needles.size(); + for (int i = 0; i < size; ++i) { + const char* cur_needle_data = needles[i].data; + const size_t cur_needle_size = needles[i].size; + + searchers.emplace_back(cur_needle_data, cur_needle_size); + } + } + + /** + * while (hasMoreToSearch()) + * { + * search inside the haystack with the known needles + * } + */ + bool hasMoreToSearch() { + if (last >= needles.size()) { + return false; + } + + return true; + } + + bool searchOne(const uint8_t* haystack, const uint8_t* haystack_end) { + const size_t size = needles.size(); + if (last >= size) { + return false; + } + + if (searchers[++last].search(haystack, haystack_end) != haystack_end) { + return true; + } + return false; + } + + template + void searchOneAll(const uint8_t* haystack, const uint8_t* haystack_end, AnsType* answer, + const CountCharsCallback& count_chars) { + const size_t size = needles.size(); + for (; last < size; ++last) { + const uint8_t* ptr = searchers[last].search(haystack, haystack_end); + if (ptr != haystack_end) { + answer[last] = count_chars(haystack, ptr); + } + } + } +}; + +using MultiStringSearcher = MultiStringSearcherBase; + } // namespace doris diff --git a/be/src/vec/common/volnitsky.h b/be/src/vec/common/volnitsky.h deleted file mode 100644 index c3d762e277..0000000000 --- a/be/src/vec/common/volnitsky.h +++ /dev/null @@ -1,478 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Volnitsky.h -// and modified by Doris - -#pragma once - -#include -#include - -#include -#include -#include - -#include "vec/common/string_searcher.h" -#include "vec/common/unaligned.h" - -/** Search for a substring in a string by Volnitsky's algorithm - * http://volnitsky.com/project/str_search/ - * - * `haystack` and `needle` can contain zero bytes. - * - * Algorithm: - * - if the `needle` is too small or too large, or too small `haystack`, use std::search or memchr; - * - when initializing, fill in an open-addressing linear probing hash table of the form - * hash from the bigram of needle -> the position of this bigram in needle + 1. - * (one is added only to distinguish zero offset from an empty cell) - * - the keys are not stored in the hash table, only the values are stored; - * - bigrams can be inserted several times if they occur in the needle several times; - * - when searching, take from haystack bigram, which should correspond to the last bigram of needle (comparing from the end); - * - look for it in the hash table, if found - get the offset from the hash table and compare the string bytewise; - * - if it did not match, we check the next cell of the hash table from the collision resolution chain; - * - if not found, skip to haystack almost the size of the needle bytes; - * - * MultiVolnitsky - search for multiple substrings in a string: - * - Add bigrams to hash table with string index. Then the usual Volnitsky search is used. - * - We are adding while searching, limiting the number of fallback searchers and the total number of added bigrams - */ - -namespace doris { -using UInt8 = uint8_t; -using UInt16 = uint16_t; -using UInt64 = uint64_t; - -namespace VolnitskyTraits { -using Offset = - UInt8; /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255. -using Id = - UInt8; /// Index of the string (within the array of multiple needles), must not be greater than 255. -using Ngram = UInt16; /// n-gram (2 bytes). - -/** Fits into the L2 cache (of common Intel CPUs). - * This number is extremely good for compilers as it is numeric_limits::max() and there are optimizations with movzwl and other instructions with 2 bytes - */ -static constexpr size_t hash_size = 64 * 1024; - -/// min haystack size to use main algorithm instead of fallback -static constexpr size_t min_haystack_size_for_algorithm = 20000; - -static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0) { - return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits::max() || - (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm); -} - -static inline Ngram toNGram(const UInt8* const pos) { - return unaligned_load(pos); -} - -template -bool putNGramASCIICaseInsensitive(const UInt8* pos, int offset, Callback&& putNGramBase) { - struct Chars { - UInt8 c0; - UInt8 c1; - }; - - union { - Ngram n; - Chars chars; - }; - - n = toNGram(pos); - - const auto c0_al = isAlphaASCII(chars.c0); - const auto c1_al = isAlphaASCII(chars.c1); - - if (c0_al && c1_al) { - /// 4 combinations: AB, aB, Ab, ab - putNGramBase(n, offset); - chars.c0 = alternateCaseIfAlphaASCII(chars.c0); - putNGramBase(n, offset); - chars.c1 = alternateCaseIfAlphaASCII(chars.c1); - putNGramBase(n, offset); - chars.c0 = alternateCaseIfAlphaASCII(chars.c0); - putNGramBase(n, offset); - } else if (c0_al) { - /// 2 combinations: A1, a1 - putNGramBase(n, offset); - chars.c0 = alternateCaseIfAlphaASCII(chars.c0); - putNGramBase(n, offset); - } else if (c1_al) { - /// 2 combinations: 0B, 0b - putNGramBase(n, offset); - chars.c1 = alternateCaseIfAlphaASCII(chars.c1); - putNGramBase(n, offset); - } else - /// 1 combination: 01 - putNGramBase(n, offset); - - return true; -} - -template -bool putNGram(const UInt8* pos, int offset, [[maybe_unused]] const UInt8* begin, size_t size, - Callback&& putNGramBase) { - if constexpr (CaseSensitive) { - putNGramBase(toNGram(pos), offset); - return true; - } else if constexpr (ASCII) { - return putNGramASCIICaseInsensitive(pos, offset, std::forward(putNGramBase)); - } else { - // return putNGramUTF8CaseInsensitive(pos, offset, begin, size, std::forward(putNGramBase)); - return false; - } -} -} // namespace VolnitskyTraits - -/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack -template -class VolnitskyBase { -protected: - const UInt8* needle; - size_t needle_size; - const UInt8* needle_end = needle + needle_size; - /// For how long we move, if the n-gram from haystack is not found in the hash table. - size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1; - - /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1) - * storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */ - std::unique_ptr hash; /// Hash table. - - bool fallback; /// Do we need to use the fallback algorithm. - - FallbackSearcher fallback_searcher; - -public: - using Searcher = FallbackSearcher; - - /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified). - * If you specify it small enough, the fallback algorithm will be used, - * since it is considered that it's useless to waste time initializing the hash table. - */ - VolnitskyBase(const char* const needle_, const size_t needle_size_, - size_t haystack_size_hint = 0) - : needle {reinterpret_cast(needle_)}, - needle_size {needle_size_}, - fallback {VolnitskyTraits::isFallbackNeedle(needle_size, haystack_size_hint)}, - fallback_searcher {needle_, needle_size} { - if (fallback || fallback_searcher.force_fallback) return; - - hash = std::unique_ptr( - new VolnitskyTraits::Offset[VolnitskyTraits::hash_size] {}); - - auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { - return this->putNGramBase(ngram, offset); - }; - /// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0 - /// And also adding from the end guarantees that we will find first occurrence because we will lookup bigger offsets first. - for (auto i = static_cast(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; - --i) { - bool ok = VolnitskyTraits::putNGram(needle + i, i + 1, needle, - needle_size, callback); - - /** `putNGramUTF8CaseInsensitive` does not work if characters with lower and upper cases - * are represented by different number of bytes or code points. - * So, use fallback if error occurred. - */ - if (!ok) { - fallback_searcher.force_fallback = true; - hash = nullptr; - return; - } - } - } - - /// If not found, the end of the haystack is returned. - const UInt8* search(const UInt8* const haystack, const size_t haystack_size) const { - if (needle_size == 0) return haystack; - - const auto* haystack_end = haystack + haystack_size; - -#ifdef __SSE4_1__ - return fallback_searcher.search(haystack, haystack_end); -#endif - - if (fallback || haystack_size <= needle_size || fallback_searcher.force_fallback) - return fallback_searcher.search(haystack, haystack_end); - - /// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle. - const auto* pos = haystack + needle_size - sizeof(VolnitskyTraits::Ngram); - for (; pos <= haystack_end - needle_size; pos += step) { - /// We look at all the cells of the hash table that can correspond to the n-gram from haystack. - for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; - hash[cell_num]; cell_num = (cell_num + 1) % VolnitskyTraits::hash_size) { - /// When found - compare bytewise, using the offset from the hash table. - const auto* res = pos - (hash[cell_num] - 1); - - /// pointer in the code is always padded array so we can use pagesafe semantics - if (fallback_searcher.compare(haystack, haystack_end, res)) return res; - } - } - - return fallback_searcher.search(pos - step + 1, haystack_end); - } - - const char* search(const char* haystack, size_t haystack_size) const { - return reinterpret_cast( - search(reinterpret_cast(haystack), haystack_size)); - } - -protected: - void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset) { - /// Put the offset for the n-gram in the corresponding cell or the nearest free cell. - size_t cell_num = ngram % VolnitskyTraits::hash_size; - - while (hash[cell_num]) - cell_num = - (cell_num + 1) % VolnitskyTraits::hash_size; /// Search for the next free cell. - - hash[cell_num] = offset; - } -}; - -template -class MultiVolnitskyBase { -private: - /// needles and their offsets - const std::vector& needles; - - /// fallback searchers - std::vector fallback_needles; - std::vector fallback_searchers; - - /// because std::pair<> is not POD - struct OffsetId { - VolnitskyTraits::Id id; - VolnitskyTraits::Offset off; - }; - - std::unique_ptr hash; - - /// step for each bunch of strings - size_t step; - - /// last index of offsets that was not processed - size_t last; - - /// limit for adding to hashtable. In worst case with case insentive search, the table will be filled at most as half - static constexpr size_t small_limit = VolnitskyTraits::hash_size / 8; - -public: - explicit MultiVolnitskyBase(const std::vector& needles_) - : needles {needles_}, step {0}, last {0} { - fallback_searchers.reserve(needles.size()); - hash = std::unique_ptr( - new OffsetId[VolnitskyTraits:: - hash_size]); /// No zero initialization, it will be done later. - } - - /** - * This function is needed to initialize hash table - * Returns `true` if there is nothing to initialize - * and `false` if we have something to initialize and initializes it. - * This function is a kind of fallback if there are many needles. - * We actually destroy the hash table and initialize it with uninitialized needles - * and search through the haystack again. - * The actual usage of this function is like this: - * while (hasMoreToSearch()) - * { - * search inside the haystack with the known needles - * } - */ - bool hasMoreToSearch() { - if (last == needles.size()) return false; - - memset(hash.get(), 0, VolnitskyTraits::hash_size * sizeof(OffsetId)); - fallback_needles.clear(); - step = std::numeric_limits::max(); - - size_t buf = 0; - size_t size = needles.size(); - - for (; last < size; ++last) { - const char* cur_needle_data = needles[last].data; - const size_t cur_needle_size = needles[last].size; - - /// save the indices of fallback searchers - if (VolnitskyTraits::isFallbackNeedle(cur_needle_size)) { - fallback_needles.push_back(last); - } else { - /// put all bigrams - auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { - return this->putNGramBase(ngram, offset, this->last); - }; - - buf += cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1; - - /// this is the condition when we actually need to stop and start searching with known needles - if (buf > small_limit) break; - - step = std::min(step, cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1); - for (auto i = static_cast(cur_needle_size - sizeof(VolnitskyTraits::Ngram)); - i >= 0; --i) { - VolnitskyTraits::putNGram( - reinterpret_cast(cur_needle_data) + i, i + 1, - reinterpret_cast(cur_needle_data), cur_needle_size, - callback); - } - } - fallback_searchers.emplace_back(cur_needle_data, cur_needle_size); - } - return true; - } - - inline bool searchOne(const UInt8* haystack, const UInt8* haystack_end) const { - const size_t fallback_size = fallback_needles.size(); - for (size_t i = 0; i < fallback_size; ++i) - if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != - haystack_end) - return true; - - /// check if we have one non empty volnitsky searcher - if (step != std::numeric_limits::max()) { - const auto* pos = haystack + step - sizeof(VolnitskyTraits::Ngram); - for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step) { - for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; - hash[cell_num].off; cell_num = (cell_num + 1) % VolnitskyTraits::hash_size) { - if (pos >= haystack + hash[cell_num].off - 1) { - const auto res = pos - (hash[cell_num].off - 1); - const size_t ind = hash[cell_num].id; - if (res + needles[ind].size <= haystack_end && - fallback_searchers[ind].compare(haystack, haystack_end, res)) - return true; - } - } - } - } - return false; - } - - inline size_t searchOneFirstIndex(const UInt8* haystack, const UInt8* haystack_end) const { - const size_t fallback_size = fallback_needles.size(); - - size_t answer = std::numeric_limits::max(); - - for (size_t i = 0; i < fallback_size; ++i) - if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != - haystack_end) - answer = std::min(answer, fallback_needles[i]); - - /// check if we have one non empty volnitsky searcher - if (step != std::numeric_limits::max()) { - const auto* pos = haystack + step - sizeof(VolnitskyTraits::Ngram); - for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step) { - for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; - hash[cell_num].off; cell_num = (cell_num + 1) % VolnitskyTraits::hash_size) { - if (pos >= haystack + hash[cell_num].off - 1) { - const auto res = pos - (hash[cell_num].off - 1); - const size_t ind = hash[cell_num].id; - if (res + needles[ind].size <= haystack_end && - fallback_searchers[ind].compare(haystack, haystack_end, res)) - answer = std::min(answer, ind); - } - } - } - } - - /* - * if nothing was found, answer + 1 will be equal to zero and we can - * assign it into the result because we need to return the position starting with one - */ - return answer + 1; - } - - template - inline UInt64 searchOneFirstPosition(const UInt8* haystack, const UInt8* haystack_end, - const CountCharsCallback& count_chars) const { - const size_t fallback_size = fallback_needles.size(); - - UInt64 answer = std::numeric_limits::max(); - - for (size_t i = 0; i < fallback_size; ++i) - if (auto pos = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); - pos != haystack_end) - answer = std::min(answer, pos - haystack); - - /// check if we have one non empty volnitsky searcher - if (step != std::numeric_limits::max()) { - const auto* pos = haystack + step - sizeof(VolnitskyTraits::Ngram); - for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step) { - for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; - hash[cell_num].off; cell_num = (cell_num + 1) % VolnitskyTraits::hash_size) { - if (pos >= haystack + hash[cell_num].off - 1) { - const auto res = pos - (hash[cell_num].off - 1); - const size_t ind = hash[cell_num].id; - if (res + needles[ind].size <= haystack_end && - fallback_searchers[ind].compare(haystack, haystack_end, res)) - answer = std::min(answer, res - haystack); - } - } - } - } - if (answer == std::numeric_limits::max()) return 0; - return count_chars(haystack, haystack + answer); - } - - template - inline void searchOneAll(const UInt8* haystack, const UInt8* haystack_end, AnsType* answer, - const CountCharsCallback& count_chars) const { - const size_t fallback_size = fallback_needles.size(); - for (size_t i = 0; i < fallback_size; ++i) { - const UInt8* ptr = - fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); - if (ptr != haystack_end) answer[fallback_needles[i]] = count_chars(haystack, ptr); - } - - /// check if we have one non empty volnitsky searcher - if (step != std::numeric_limits::max()) { - const auto* pos = haystack + step - sizeof(VolnitskyTraits::Ngram); - for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step) { - for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; - hash[cell_num].off; cell_num = (cell_num + 1) % VolnitskyTraits::hash_size) { - if (pos >= haystack + hash[cell_num].off - 1) { - const auto* res = pos - (hash[cell_num].off - 1); - const size_t ind = hash[cell_num].id; - if (answer[ind] == 0 && res + needles[ind].size <= haystack_end && - fallback_searchers[ind].compare(haystack, haystack_end, res)) - answer[ind] = count_chars(haystack, res); - } - } - } - } - } - - void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset, const size_t num) { - size_t cell_num = ngram % VolnitskyTraits::hash_size; - - while (hash[cell_num].off) cell_num = (cell_num + 1) % VolnitskyTraits::hash_size; - - hash[cell_num] = {static_cast(num), - static_cast(offset)}; - } -}; - -using Volnitsky = VolnitskyBase; -using VolnitskyUTF8 = - VolnitskyBase; /// exactly same as Volnitsky - -using VolnitskyCaseSensitiveToken = VolnitskyBase; - -using MultiVolnitsky = MultiVolnitskyBase; -using MultiVolnitskyUTF8 = MultiVolnitskyBase; - -} // namespace doris diff --git a/be/src/vec/functions/functions_multi_string_position.cpp b/be/src/vec/functions/functions_multi_string_position.cpp index 756d561ee6..a3c3420acb 100644 --- a/be/src/vec/functions/functions_multi_string_position.cpp +++ b/be/src/vec/functions/functions_multi_string_position.cpp @@ -25,7 +25,7 @@ #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/common/pod_array.h" -#include "vec/common/volnitsky.h" +#include "vec/common/string_searcher.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" @@ -221,7 +221,7 @@ struct FunctionMultiSearchAllPositionsImpl { }; struct MultiSearcherImpl { - using MultiSearcher = MultiVolnitsky; + using MultiSearcher = MultiStringSearcher; static MultiSearcher create_multi_searcher(const std::vector& needles) { return MultiSearcher(needles);