[Performance](exec) replace SipHash in function by XXHash (#32919)

This commit is contained in:
HappenLee
2024-03-28 11:00:52 +08:00
committed by yiguolei
parent 28e2d89ce3
commit e3bd2311b1
2 changed files with 11 additions and 17 deletions

View File

@ -36,7 +36,6 @@
#include "vec/common/assert_cast.h"
#include "vec/common/hash_table/hash.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
#include "vec/common/sip_hash.h"
#include "vec/common/string_ref.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
@ -64,17 +63,14 @@ template <typename T>
struct AggregateFunctionUniqExactData {
static constexpr bool is_string_key = std::is_same_v<T, String>;
using Key = std::conditional_t<is_string_key, UInt128, T>;
using Hash = std::conditional_t<is_string_key, UInt128TrivialHash, HashCRC32<Key>>;
using Hash = HashCRC32<Key>;
using Set = flat_hash_set<Key, Hash>;
// TODO: replace SipHash with xxhash to speed up
static UInt128 ALWAYS_INLINE get_key(const StringRef& value) {
UInt128 key;
SipHash hash;
hash.update(value.data, value.size);
hash.get128(key.low, key.high);
return key;
auto hash_value = XXH_INLINE_XXH128(value.data, value.size, 0);
return UInt128 {hash_value.high64, hash_value.low64};
}
Set set;

View File

@ -22,10 +22,7 @@
#include "common/status.h"
#include "runtime/large_int_value.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/common/hash_table/hash.h"
#include "vec/common/sip_hash.h"
#include "vec/common/uint128.h"
#include "vec/core/block.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_number.h"
@ -100,7 +97,6 @@ public:
}
// TODO(zhiqiang): May be override open function?
Status execute_impl(FunctionContext* /*context*/, Block& block,
const ColumnNumbers& /*arguments*/, size_t result,
size_t input_rows_count) const override {
@ -140,14 +136,16 @@ private:
UInt64 randomSeed() const {
struct timespec times {};
clock_gettime(CLOCK_MONOTONIC, &times);
/// Not cryptographically secure as time, pid and stack address can be predictable.
auto ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const char*>(&times.tv_nsec),
sizeof(times.tv_nsec), 0);
ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const char*>(&times.tv_sec),
sizeof(times.tv_sec), ret);
ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const char*>((uintptr_t)pthread_self()),
sizeof(pthread_t), ret);
SipHash hash;
hash.update(times.tv_nsec);
hash.update(times.tv_sec);
hash.update((uintptr_t)pthread_self());
return hash.get64();
return ret;
}
};