[function](hash) add support of murmur_hash3_64 (#12923)
This commit is contained in:
@ -154,9 +154,22 @@ struct MurmurHash2Impl64 {
|
||||
};
|
||||
using FunctionMurmurHash2_64 = FunctionVariadicArgumentsBase<DataTypeUInt64, MurmurHash2Impl64>;
|
||||
|
||||
struct MurmurHash3Impl32 {
|
||||
template <typename ReturnType>
|
||||
struct MurmurHash3ImplName {};
|
||||
|
||||
template <>
|
||||
struct MurmurHash3ImplName<Int32> {
|
||||
static constexpr auto name = "murmur_hash3_32";
|
||||
using ReturnType = Int32;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MurmurHash3ImplName<Int64> {
|
||||
static constexpr auto name = "murmur_hash3_64";
|
||||
};
|
||||
|
||||
template <typename ReturnType>
|
||||
struct MurmurHash3Impl {
|
||||
static constexpr auto name = MurmurHash3ImplName<ReturnType>::name;
|
||||
|
||||
static Status empty_apply(IColumn& icolumn, size_t input_rows_count) {
|
||||
ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
|
||||
@ -177,6 +190,7 @@ struct MurmurHash3Impl32 {
|
||||
template <bool first>
|
||||
static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
|
||||
IColumn& col_to) {
|
||||
auto* col_to_data = assert_cast<ColumnVector<ReturnType>&>(col_to).get_data().data();
|
||||
if (const ColumnString* col_from = check_and_get_column<ColumnString>(column)) {
|
||||
const typename ColumnString::Chars& data = col_from->get_chars();
|
||||
const typename ColumnString::Offsets& offsets = col_from->get_offsets();
|
||||
@ -185,15 +199,29 @@ struct MurmurHash3Impl32 {
|
||||
ColumnString::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (first) {
|
||||
UInt32 val = HashUtil::murmur_hash3_32(
|
||||
reinterpret_cast<const char*>(&data[current_offset]),
|
||||
offsets[i] - current_offset, HashUtil::MURMUR3_32_SEED);
|
||||
col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
|
||||
if constexpr (std::is_same_v<ReturnType, Int32>) {
|
||||
UInt32 val = HashUtil::murmur_hash3_32(
|
||||
reinterpret_cast<const char*>(&data[current_offset]),
|
||||
offsets[i] - current_offset, HashUtil::MURMUR3_32_SEED);
|
||||
col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)),
|
||||
0);
|
||||
} else {
|
||||
UInt64 val = 0;
|
||||
murmur_hash3_x64_64(reinterpret_cast<const char*>(&data[current_offset]),
|
||||
offsets[i] - current_offset, 0, &val);
|
||||
col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)),
|
||||
0);
|
||||
}
|
||||
} else {
|
||||
assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i] =
|
||||
HashUtil::murmur_hash3_32(
|
||||
reinterpret_cast<const char*>(&data[current_offset]),
|
||||
offsets[i] - current_offset, ext::bit_cast<UInt32>(col_to[i]));
|
||||
if constexpr (std::is_same_v<ReturnType, Int32>) {
|
||||
col_to_data[i] = HashUtil::murmur_hash3_32(
|
||||
reinterpret_cast<const char*>(&data[current_offset]),
|
||||
offsets[i] - current_offset, ext::bit_cast<UInt32>(col_to[i]));
|
||||
} else {
|
||||
murmur_hash3_x64_64(reinterpret_cast<const char*>(&data[current_offset]),
|
||||
offsets[i] - current_offset,
|
||||
ext::bit_cast<UInt64>(col_to[i]), col_to_data + i);
|
||||
}
|
||||
}
|
||||
current_offset = offsets[i];
|
||||
}
|
||||
@ -202,13 +230,25 @@ struct MurmurHash3Impl32 {
|
||||
String value = col_from_const->get_value<String>().data();
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
if (first) {
|
||||
UInt32 val = HashUtil::murmur_hash3_32(value.data(), value.size(),
|
||||
HashUtil::MURMUR3_32_SEED);
|
||||
col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
|
||||
if constexpr (std::is_same_v<ReturnType, Int32>) {
|
||||
UInt32 val = HashUtil::murmur_hash3_32(value.data(), value.size(),
|
||||
HashUtil::MURMUR3_32_SEED);
|
||||
col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)),
|
||||
0);
|
||||
} else {
|
||||
UInt64 val = 0;
|
||||
murmur_hash3_x64_64(value.data(), value.size(), 0, &val);
|
||||
col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)),
|
||||
0);
|
||||
}
|
||||
} else {
|
||||
assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i] =
|
||||
HashUtil::murmur_hash3_32(value.data(), value.size(),
|
||||
ext::bit_cast<UInt32>(col_to[i]));
|
||||
if constexpr (std::is_same_v<ReturnType, Int32>) {
|
||||
col_to_data[i] = HashUtil::murmur_hash3_32(
|
||||
value.data(), value.size(), ext::bit_cast<UInt32>(col_to[i]));
|
||||
} else {
|
||||
murmur_hash3_x64_64(value.data(), value.size(),
|
||||
ext::bit_cast<UInt64>(col_to[i]), col_to_data + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -219,10 +259,12 @@ struct MurmurHash3Impl32 {
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
using FunctionMurmurHash3_32 = FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl32>;
|
||||
using FunctionMurmurHash3_32 = FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl<Int32>>;
|
||||
using FunctionMurmurHash3_64 = FunctionVariadicArgumentsBase<DataTypeInt64, MurmurHash3Impl<Int64>>;
|
||||
|
||||
void register_function_hash(SimpleFunctionFactory& factory) {
|
||||
factory.register_function<FunctionMurmurHash2_64>();
|
||||
factory.register_function<FunctionMurmurHash3_32>();
|
||||
factory.register_function<FunctionMurmurHash3_64>();
|
||||
}
|
||||
} // namespace doris::vectorized
|
||||
Reference in New Issue
Block a user