remove duplicate type definition in function context remove unused method in function context not need stale state in vexpr context because vexpr is stateless and function context saves state and they are cloned. remove useless slot_size in all tuple or slot descriptor. remove doris_udf namespace, it is useless. remove some unused macro definitions. init v_conjuncts in vscanner, not need write the same code in every scanner. using unique ptr to manage function context since it could only belong to a single expr context. Issue Number: close #xxx --------- Co-authored-by: yiguolei <yiguolei@gmail.com>
451 lines
15 KiB
C++
451 lines
15 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
// This file is copied from
|
|
// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/StringRef.h
|
|
// and modified by Doris
|
|
|
|
#pragma once
|
|
|
|
#include <climits>
|
|
#include <functional>
|
|
#include <ostream>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <vector>
|
|
|
|
#include "gutil/hash/city.h"
|
|
#include "gutil/hash/hash128to64.h"
|
|
#include "udf/udf.h"
|
|
#include "util/cpu_info.h"
|
|
#include "util/hash_util.hpp"
|
|
#include "util/slice.h"
|
|
#include "vec/common/string_ref.h"
|
|
#include "vec/common/unaligned.h"
|
|
#include "vec/core/types.h"
|
|
|
|
#if defined(__SSE2__)
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
#if defined(__SSE4_2__)
|
|
#include <nmmintrin.h>
|
|
#include <smmintrin.h>
|
|
|
|
#include "util/sse_util.hpp"
|
|
#endif
|
|
|
|
#if defined(__aarch64__)
|
|
#include <sse2neon.h>
|
|
#endif
|
|
|
|
namespace doris {
|
|
|
|
/// unnamed namespace packaging simd-style equality compare functions.
|
|
namespace {
|
|
|
|
#if defined(__SSE2__) || defined(__aarch64__)
|
|
|
|
/** Compare strings for equality.
|
|
* The approach is controversial and does not win in all cases.
|
|
* For more information, see hash_map_string_2.cpp
|
|
*/
|
|
|
|
inline bool compareSSE2(const char* p1, const char* p2) {
|
|
return 0xFFFF ==
|
|
_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(p1)),
|
|
_mm_loadu_si128(reinterpret_cast<const __m128i*>(p2))));
|
|
}
|
|
|
|
inline bool compareSSE2x4(const char* p1, const char* p2) {
|
|
return 0xFFFF ==
|
|
_mm_movemask_epi8(_mm_and_si128(
|
|
_mm_and_si128(
|
|
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(p1)),
|
|
_mm_loadu_si128(reinterpret_cast<const __m128i*>(p2))),
|
|
_mm_cmpeq_epi8(
|
|
_mm_loadu_si128(reinterpret_cast<const __m128i*>(p1) + 1),
|
|
_mm_loadu_si128(reinterpret_cast<const __m128i*>(p2) + 1))),
|
|
_mm_and_si128(
|
|
_mm_cmpeq_epi8(
|
|
_mm_loadu_si128(reinterpret_cast<const __m128i*>(p1) + 2),
|
|
_mm_loadu_si128(reinterpret_cast<const __m128i*>(p2) + 2)),
|
|
_mm_cmpeq_epi8(
|
|
_mm_loadu_si128(reinterpret_cast<const __m128i*>(p1) + 3),
|
|
_mm_loadu_si128(reinterpret_cast<const __m128i*>(p2) + 3)))));
|
|
}
|
|
|
|
inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) {
|
|
/** The order of branches and the trick with overlapping comparisons
|
|
* are the same as in memcpy implementation.
|
|
* See the comments in
|
|
* https://github.com/ClickHouse/ClickHouse/blob/master/base/glibc-compatibility/memcpy/memcpy.h
|
|
*/
|
|
|
|
if (size <= 16) {
|
|
if (size >= 8) {
|
|
/// Chunks of [8,16] bytes.
|
|
return unaligned_load<uint64_t>(p1) == unaligned_load<uint64_t>(p2) &&
|
|
unaligned_load<uint64_t>(p1 + size - 8) ==
|
|
unaligned_load<uint64_t>(p2 + size - 8);
|
|
} else if (size >= 4) {
|
|
/// Chunks of [4,7] bytes.
|
|
return unaligned_load<uint32_t>(p1) == unaligned_load<uint32_t>(p2) &&
|
|
unaligned_load<uint32_t>(p1 + size - 4) ==
|
|
unaligned_load<uint32_t>(p2 + size - 4);
|
|
} else if (size >= 2) {
|
|
/// Chunks of [2,3] bytes.
|
|
return unaligned_load<uint16_t>(p1) == unaligned_load<uint16_t>(p2) &&
|
|
unaligned_load<uint16_t>(p1 + size - 2) ==
|
|
unaligned_load<uint16_t>(p2 + size - 2);
|
|
} else if (size >= 1) {
|
|
/// A single byte.
|
|
return *p1 == *p2;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
while (size >= 64) {
|
|
if (compareSSE2x4(p1, p2)) {
|
|
p1 += 64;
|
|
p2 += 64;
|
|
size -= 64;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
switch (size / 16) {
|
|
case 3:
|
|
if (!compareSSE2(p1 + 32, p2 + 32)) {
|
|
return false;
|
|
}
|
|
[[fallthrough]];
|
|
case 2:
|
|
if (!compareSSE2(p1 + 16, p2 + 16)) {
|
|
return false;
|
|
}
|
|
[[fallthrough]];
|
|
case 1:
|
|
if (!compareSSE2(p1, p2)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return compareSSE2(p1 + size - 16, p2 + size - 16);
|
|
}
|
|
|
|
#endif
|
|
|
|
// Compare two strings using sse4.2 intrinsics if they are available. This code assumes
|
|
// that the trivial cases are already handled (i.e. one string is empty).
|
|
// Returns:
|
|
// < 0 if s1 < s2
|
|
// 0 if s1 == s2
|
|
// > 0 if s1 > s2
|
|
// The SSE code path is just under 2x faster than the non-sse code path.
|
|
// - s1/n1: ptr/len for the first string
|
|
// - s2/n2: ptr/len for the second string
|
|
// - len: min(n1, n2) - this can be more cheaply passed in by the caller
|
|
inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, int64_t len) {
|
|
DCHECK_EQ(len, std::min(n1, n2));
|
|
#ifdef __SSE4_2__
|
|
while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) {
|
|
__m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
|
|
__m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
|
|
int chars_match = _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1,
|
|
sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE);
|
|
if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) {
|
|
return (unsigned char)s1[chars_match] - (unsigned char)s2[chars_match];
|
|
}
|
|
len -= sse_util::CHARS_PER_128_BIT_REGISTER;
|
|
s1 += sse_util::CHARS_PER_128_BIT_REGISTER;
|
|
s2 += sse_util::CHARS_PER_128_BIT_REGISTER;
|
|
}
|
|
#endif
|
|
unsigned char u1, u2;
|
|
while (len-- > 0) {
|
|
u1 = (unsigned char)*s1++;
|
|
u2 = (unsigned char)*s2++;
|
|
if (u1 != u2) {
|
|
return u1 - u2;
|
|
}
|
|
if (u1 == '\0') {
|
|
return n1 - n2;
|
|
}
|
|
}
|
|
|
|
return n1 - n2;
|
|
}
|
|
|
|
} // unnamed namespace
|
|
|
|
/// The thing to avoid creating strings to find substrings in the hash table.
|
|
/// User should make sure data source is const.
|
|
/// maybe considering rewrite it with std::span / std::basic_string_view is meaningful.
|
|
struct StringRef {
|
|
// FIXME: opening member accessing really damages.
|
|
const char* data = nullptr;
|
|
size_t size = 0;
|
|
|
|
StringRef() = default;
|
|
StringRef(const char* data_, size_t size_) : data(data_), size(size_) {}
|
|
StringRef(const unsigned char* data_, size_t size_)
|
|
: StringRef(reinterpret_cast<const char*>(data_), size_) {}
|
|
|
|
StringRef(const std::string& s) : data(s.data()), size(s.size()) {}
|
|
StringRef(const StringVal& src) : StringRef(src.ptr, src.len) {}
|
|
explicit StringRef(const char* str) : data(str), size(strlen(str)) {}
|
|
|
|
static StringRef from_string_val(const StringVal& src) { return StringRef(src); }
|
|
|
|
std::string to_string() const { return std::string(data, size); }
|
|
std::string debug_string() const { return to_string(); }
|
|
std::string_view to_string_view() const { return std::string_view(data, size); }
|
|
Slice to_slice() const { return doris::Slice(data, size); }
|
|
|
|
// this is just for show, e.g. print data to error log, to avoid print large string.
|
|
std::string to_prefix(size_t length) const { return std::string(data, std::min(length, size)); }
|
|
|
|
// TODO: this function is dangerous!
|
|
StringVal to_string_val() const {
|
|
return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(data)), size);
|
|
}
|
|
|
|
void to_string_val(StringVal* sv) const {
|
|
*sv = StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(data)), size);
|
|
}
|
|
|
|
explicit operator std::string() const { return to_string(); }
|
|
operator std::string_view() const { return std::string_view {data, size}; }
|
|
|
|
StringRef substring(int start_pos, int new_len) const {
|
|
return StringRef(data + start_pos, (new_len < 0) ? (size - start_pos) : new_len);
|
|
}
|
|
|
|
StringRef substring(int start_pos) const { return substring(start_pos, size - start_pos); }
|
|
|
|
// Trims leading and trailing spaces.
|
|
StringRef trim() const;
|
|
|
|
bool empty() const { return size == 0; }
|
|
|
|
// support for type_limit
|
|
static constexpr char MIN_CHAR = 0;
|
|
static constexpr char MAX_CHAR = char(
|
|
UCHAR_MAX); // We will convert char to uchar and compare, so we define max_char to unsigned char max.
|
|
static StringRef min_string_val();
|
|
static StringRef max_string_val();
|
|
|
|
bool start_with(const StringRef& search_string) const;
|
|
bool end_with(const StringRef& search_string) const;
|
|
|
|
// Byte-by-byte comparison. Returns:
|
|
// this < other: -1
|
|
// this == other: 0
|
|
// this > other: 1
|
|
int compare(const StringRef& other) const {
|
|
int l = std::min(size, other.size);
|
|
|
|
if (l == 0) {
|
|
if (size == other.size) {
|
|
return 0;
|
|
} else if (size == 0) {
|
|
return -1;
|
|
} else {
|
|
DCHECK_EQ(other.size, 0);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return string_compare(this->data, this->size, other.data, other.size, l);
|
|
}
|
|
|
|
void replace(const char* ptr, int len) {
|
|
this->data = ptr;
|
|
this->size = len;
|
|
}
|
|
|
|
// Find the first position char of appear, return -1 if not found
|
|
size_t find_first_of(char c) const;
|
|
|
|
// ==
|
|
bool eq(const StringRef& other) const {
|
|
if (this->size != other.size) {
|
|
return false;
|
|
}
|
|
#if defined(__SSE2__)
|
|
return memequalSSE2Wide(this->data, other.data, this->size);
|
|
#endif
|
|
return string_compare(this->data, this->size, other.data, other.size, this->size) == 0;
|
|
}
|
|
|
|
bool operator==(const StringRef& other) const { return eq(other); }
|
|
// !=
|
|
bool ne(const StringRef& other) const { return !eq(other); }
|
|
// <=
|
|
bool le(const StringRef& other) const { return compare(other) <= 0; }
|
|
// >=
|
|
bool ge(const StringRef& other) const { return compare(other) >= 0; }
|
|
// <
|
|
bool lt(const StringRef& other) const { return compare(other) < 0; }
|
|
// >
|
|
bool gt(const StringRef& other) const { return compare(other) > 0; }
|
|
|
|
bool operator!=(const StringRef& other) const { return ne(other); }
|
|
|
|
bool operator<=(const StringRef& other) const { return le(other); }
|
|
|
|
bool operator>=(const StringRef& other) const { return ge(other); }
|
|
|
|
bool operator<(const StringRef& other) const { return lt(other); }
|
|
|
|
bool operator>(const StringRef& other) const { return gt(other); }
|
|
|
|
struct Comparator {
|
|
bool operator()(const StringRef& a, const StringRef& b) const { return a.compare(b) < 0; }
|
|
};
|
|
}; // class StringRef
|
|
|
|
// This function must be called 'hash_value' to be picked up by boost.
|
|
inline std::size_t hash_value(const StringRef& v) {
|
|
return HashUtil::hash(v.data, v.size, 0);
|
|
}
|
|
|
|
using StringRefs = std::vector<StringRef>;
|
|
|
|
/** Hash functions.
|
|
* You can use either CityHash64,
|
|
* or a function based on the crc32 statement,
|
|
* which is obviously less qualitative, but on real data sets,
|
|
* when used in a hash table, works much faster.
|
|
* For more information, see hash_map_string_3.cpp
|
|
*/
|
|
|
|
struct StringRefHash64 {
|
|
size_t operator()(StringRef x) const { return util_hash::CityHash64(x.data, x.size); }
|
|
};
|
|
|
|
#if defined(__SSE4_2__) || defined(__aarch64__)
|
|
|
|
/// Parts are taken from CityHash.
|
|
|
|
inline doris::vectorized::UInt64 hash_len16(doris::vectorized::UInt64 u,
|
|
doris::vectorized::UInt64 v) {
|
|
return Hash128to64(uint128(u, v));
|
|
}
|
|
|
|
inline doris::vectorized::UInt64 shift_mix(doris::vectorized::UInt64 val) {
|
|
return val ^ (val >> 47);
|
|
}
|
|
|
|
inline doris::vectorized::UInt64 rotate_by_at_least1(doris::vectorized::UInt64 val, int shift) {
|
|
return (val >> shift) | (val << (64 - shift));
|
|
}
|
|
|
|
inline size_t hash_less_than8(const char* data, size_t size) {
|
|
static constexpr doris::vectorized::UInt64 k2 = 0x9ae16a3b2f90404fULL;
|
|
static constexpr doris::vectorized::UInt64 k3 = 0xc949d7c7509e6557ULL;
|
|
|
|
if (size >= 4) {
|
|
doris::vectorized::UInt64 a = unaligned_load<uint32_t>(data);
|
|
return hash_len16(size + (a << 3), unaligned_load<uint32_t>(data + size - 4));
|
|
}
|
|
|
|
if (size > 0) {
|
|
uint8_t a = data[0];
|
|
uint8_t b = data[size >> 1];
|
|
uint8_t c = data[size - 1];
|
|
uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
|
|
uint32_t z = size + (static_cast<uint32_t>(c) << 2);
|
|
return shift_mix(y * k2 ^ z * k3) * k2;
|
|
}
|
|
|
|
return k2;
|
|
}
|
|
|
|
inline size_t hash_less_than16(const char* data, size_t size) {
|
|
if (size > 8) {
|
|
doris::vectorized::UInt64 a = unaligned_load<doris::vectorized::UInt64>(data);
|
|
doris::vectorized::UInt64 b = unaligned_load<doris::vectorized::UInt64>(data + size - 8);
|
|
return hash_len16(a, rotate_by_at_least1(b + size, size)) ^ b;
|
|
}
|
|
|
|
return hash_less_than8(data, size);
|
|
}
|
|
|
|
struct CRC32Hash {
|
|
size_t operator()(const StringRef& x) const {
|
|
const char* pos = x.data;
|
|
size_t size = x.size;
|
|
|
|
if (size == 0) {
|
|
return 0;
|
|
}
|
|
|
|
if (size < 8) {
|
|
return hash_less_than8(x.data, x.size);
|
|
}
|
|
|
|
const char* end = pos + size;
|
|
size_t res = -1ULL;
|
|
|
|
do {
|
|
doris::vectorized::UInt64 word = unaligned_load<doris::vectorized::UInt64>(pos);
|
|
res = _mm_crc32_u64(res, word);
|
|
|
|
pos += 8;
|
|
} while (pos + 8 < end);
|
|
|
|
doris::vectorized::UInt64 word = unaligned_load<doris::vectorized::UInt64>(
|
|
end - 8); /// I'm not sure if this is normal.
|
|
res = _mm_crc32_u64(res, word);
|
|
|
|
return res;
|
|
}
|
|
};
|
|
|
|
struct StringRefHash : CRC32Hash {};
|
|
|
|
#else
|
|
|
|
struct CRC32Hash {
|
|
size_t operator()(StringRef /* x */) const {
|
|
throw std::logic_error {"Not implemented CRC32Hash without SSE"};
|
|
}
|
|
};
|
|
|
|
struct StringRefHash : StringRefHash64 {};
|
|
|
|
#endif // end of hash functions
|
|
|
|
inline std::ostream& operator<<(std::ostream& os, const StringRef& str) {
|
|
return os << str.to_string();
|
|
}
|
|
} // namespace doris
|
|
|
|
namespace ZeroTraits {
|
|
inline bool check(const doris::StringRef& x) {
|
|
return 0 == x.size;
|
|
}
|
|
inline void set(doris::StringRef& x) {
|
|
x.size = 0;
|
|
}
|
|
} // namespace ZeroTraits
|
|
|
|
template <>
|
|
struct std::hash<doris::StringRef> : public doris::StringRefHash {};
|