diff --git a/be/src/vec/functions/function_hash.cpp b/be/src/vec/functions/function_hash.cpp index 873cf4eb2b..cb8dfc0943 100644 --- a/be/src/vec/functions/function_hash.cpp +++ b/be/src/vec/functions/function_hash.cpp @@ -40,135 +40,6 @@ namespace doris::vectorized { constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c; -struct MurmurHash2Impl64 { - static constexpr auto name = "murmurHash2_64"; - using ReturnType = UInt64; - - static Status empty_apply(IColumn& icolumn, size_t input_rows_count) { - auto& vec_to = assert_cast&>(icolumn); - vec_to.get_data().assign(input_rows_count, static_cast(emtpy_value)); - return Status::OK(); - } - - static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count, - IColumn& icolumn) { - static_cast(execute_any(type, column, icolumn, input_rows_count)); - return Status::OK(); - } - - static Status combine_apply(const IDataType* type, const IColumn* column, - size_t input_rows_count, IColumn& icolumn) { - static_cast(execute_any(type, column, icolumn, input_rows_count)); - return Status::OK(); - } - - template - static Status execute_int_type(const IColumn* column, IColumn& col_to, - size_t input_rows_count) { - if (const auto* col_from = check_and_get_column>(column)) { - const typename ColumnVector::Container& vec_from = col_from->get_data(); - size_t size = vec_from.size(); - for (size_t i = 0; i < size; ++i) { - ReturnType val = HashUtil::murmur_hash2_64( - reinterpret_cast(reinterpret_cast(&vec_from[i])), - sizeof(vec_from[i]), 0); - if (first) { - col_to.insert_data(const_cast(reinterpret_cast(&val)), 0); - } else { - assert_cast&>(col_to).get_data()[i] = - IntHash64Impl::apply( - assert_cast&>(col_to).get_data()[i]) ^ - val; - } - } - } else if (auto col_from_const = - check_and_get_column_const>(column)) { - ReturnType value = col_from_const->template get_value(); - ReturnType val; - val = IntHash64Impl::apply(value); - for (size_t i = 0; i < input_rows_count; ++i) { - if (first) { - col_to.insert_data(const_cast(reinterpret_cast(&val)), 0); - } else { - assert_cast&>(col_to).get_data()[i] = - IntHash64Impl::apply( - assert_cast&>(col_to).get_data()[i]) ^ - val; - } - } - } else { - DCHECK(false); - return Status::NotSupported("Illegal column {} of argument of function {}", - column->get_name(), name); - } - return Status::OK(); - } - - template - static Status execute_string(const IColumn* column, IColumn& col_to, size_t input_rows_count) { - if (const auto* col_from = check_and_get_column(column)) { - const typename ColumnString::Chars& data = col_from->get_chars(); - const typename ColumnString::Offsets& offsets = col_from->get_offsets(); - size_t size = offsets.size(); - - ColumnString::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) { - const ReturnType val = HashUtil::murmur_hash2_64( - reinterpret_cast(&data[current_offset]), - offsets[i] - current_offset, 0); - - if (first) { - col_to.insert_data(reinterpret_cast(&val), 0); - } else { - assert_cast&>(col_to).get_data()[i] = - IntHash64Impl::apply( - assert_cast&>(col_to).get_data()[i]) ^ - val; - } - - current_offset = offsets[i]; - } - } else if (const ColumnConst* col_from_const = - check_and_get_column_const_string_or_fixedstring(column)) { - auto value = col_from_const->get_value(); - const ReturnType val = HashUtil::murmur_hash2_64(value.data(), value.size(), 0); - - for (size_t i = 0; i < input_rows_count; ++i) { - if (first) { - col_to.insert_data(reinterpret_cast(&val), 0); - } else { - assert_cast&>(col_to).get_data()[i] = - IntHash64Impl::apply( - assert_cast&>(col_to).get_data()[i]) ^ - val; - } - } - } else { - DCHECK(false); - return Status::NotSupported("Illegal column {} of argument of function {}", - column->get_name(), name); - } - return Status::OK(); - } - - template - static Status execute_any(const IDataType* from_type, const IColumn* icolumn, IColumn& col_to, - size_t input_rows_count) { - WhichDataType which(from_type); - if (which.is_string()) { - return execute_string(icolumn, col_to, input_rows_count); - } - -#define DISPATCH(TYPE, COLUMN_TYPE) \ - if (which.idx == TypeIndex::TYPE) \ - return execute_int_type(icolumn, col_to, input_rows_count); - NUMERIC_TYPE_TO_COLUMN_TYPE(DISPATCH) -#undef DISPATCH - return Status::NotSupported("argument_type {} not supported", from_type->get_name()); - } -}; -using FunctionMurmurHash2_64 = FunctionVariadicArgumentsBase; - template struct MurmurHash3ImplName {}; @@ -282,7 +153,6 @@ using FunctionMurmurHash3_32 = FunctionVariadicArgumentsBase>; void register_function_hash(SimpleFunctionFactory& factory) { - factory.register_function(); factory.register_function(); factory.register_function(); } diff --git a/be/src/vec/functions/function_hash.h b/be/src/vec/functions/function_hash.h index 84dda9bd1a..0516dd85c4 100644 --- a/be/src/vec/functions/function_hash.h +++ b/be/src/vec/functions/function_hash.h @@ -20,14 +20,6 @@ #pragma once -#include "vec/common/hash_table/hash.h" #include "vec/core/types.h" -namespace doris::vectorized { - -struct IntHash64Impl { - using ReturnType = UInt64; - - static UInt64 apply(UInt64 x) { return int_hash64(x ^ 0x4CF2D2BAAE6DA887ULL); } -}; -} // namespace doris::vectorized +namespace doris::vectorized {} // namespace doris::vectorized diff --git a/be/test/vec/function/function_hash_test.cpp b/be/test/vec/function/function_hash_test.cpp index 2e12cd64f3..10c57d1c31 100644 --- a/be/test/vec/function/function_hash_test.cpp +++ b/be/test/vec/function/function_hash_test.cpp @@ -94,37 +94,4 @@ TEST(HashFunctionTest, murmur_hash_3_64_test) { }; } -TEST(HashFunctionTest, murmur_hash_2_test) { - std::string func_name = "murmurHash2_64"; - - { - InputTypeSet input_types = {TypeIndex::String}; - - DataSet data_set = {{{Null()}, Null()}, - {{std::string("hello")}, (uint64_t)2191231550387646743ull}}; - - static_cast(check_function(func_name, input_types, data_set)); - }; - - { - InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; - - DataSet data_set = { - {{std::string("hello"), std::string("world")}, (uint64_t)11978658642541747642ull}, - {{std::string("hello"), Null()}, Null()}}; - - static_cast(check_function(func_name, input_types, data_set)); - }; - - { - InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String}; - - DataSet data_set = {{{std::string("hello"), std::string("world"), std::string("!")}, - (uint64_t)1367324781703025231ull}, - {{std::string("hello"), std::string("world"), Null()}, Null()}}; - - static_cast(check_function(func_name, input_types, data_set)); - }; -} - } // namespace doris::vectorized