diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp index afa8a14538..402797a8e3 100644 --- a/be/src/util/hash_util.hpp +++ b/be/src/util/hash_util.hpp @@ -335,6 +335,16 @@ public: #endif // xxHash function for a byte array. For convenience, a 64-bit seed is also // hashed into the result. The mapping may change from time to time. + static xxh_u32 xxHash32WithSeed(const char* s, size_t len, xxh_u32 seed) { + return XXH32(s, len, seed); + } + + // same to the up function, just for null value + static xxh_u32 xxHash32NullWithSeed(xxh_u32 seed) { + static const int INT_VALUE = 0; + return XXH32(reinterpret_cast(&INT_VALUE), sizeof(int), seed); + } + static xxh_u64 xxHash64WithSeed(const char* s, size_t len, xxh_u64 seed) { return XXH3_64bits_withSeed(s, len, seed); } @@ -344,6 +354,7 @@ public: static const int INT_VALUE = 0; return XXH3_64bits_withSeed(reinterpret_cast(&INT_VALUE), sizeof(int), seed); } + #if defined(__clang__) #pragma clang diagnostic pop #endif diff --git a/be/src/vec/functions/function_hash.cpp b/be/src/vec/functions/function_hash.cpp index cb8dfc0943..195dff9483 100644 --- a/be/src/vec/functions/function_hash.cpp +++ b/be/src/vec/functions/function_hash.cpp @@ -40,22 +40,10 @@ namespace doris::vectorized { constexpr uint64_t emtpy_value = 0xe28dbde7fe22e41c; -template -struct MurmurHash3ImplName {}; - -template <> -struct MurmurHash3ImplName { - static constexpr auto name = "murmur_hash3_32"; -}; - -template <> -struct MurmurHash3ImplName { - static constexpr auto name = "murmur_hash3_64"; -}; - template struct MurmurHash3Impl { - static constexpr auto name = MurmurHash3ImplName::name; + static constexpr auto name = + std::is_same_v ? "murmur_hash3_32" : "murmur_hash3_64"; static Status empty_apply(IColumn& icolumn, size_t input_rows_count) { ColumnVector& vec_to = assert_cast&>(icolumn); @@ -76,40 +64,29 @@ struct MurmurHash3Impl { template static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count, IColumn& col_to) { - auto* col_to_data = assert_cast&>(col_to).get_data().data(); + auto& to_column = assert_cast&>(col_to); + if constexpr (first) { + if constexpr (std::is_same_v) { + to_column.fill(static_cast(HashUtil::MURMUR3_32_SEED), input_rows_count); + } else { + to_column.insert_many_defaults(input_rows_count); + } + } + auto& col_to_data = to_column.get_data(); if (const auto* col_from = check_and_get_column(column)) { const typename ColumnString::Chars& data = col_from->get_chars(); const typename ColumnString::Offsets& offsets = col_from->get_offsets(); size_t size = offsets.size(); - ColumnString::Offset current_offset = 0; for (size_t i = 0; i < size; ++i) { - if (first) { - if constexpr (std::is_same_v) { - UInt32 val = HashUtil::murmur_hash3_32( - reinterpret_cast(&data[current_offset]), - offsets[i] - current_offset, HashUtil::MURMUR3_32_SEED); - col_to.insert_data(const_cast(reinterpret_cast(&val)), - 0); - } else { - UInt64 val = 0; - murmur_hash3_x64_64(reinterpret_cast(&data[current_offset]), - offsets[i] - current_offset, 0, &val); - col_to.insert_data(const_cast(reinterpret_cast(&val)), - 0); - } + if constexpr (std::is_same_v) { + col_to_data[i] = HashUtil::murmur_hash3_32( + reinterpret_cast(&data[current_offset]), + offsets[i] - current_offset, col_to_data[i]); } else { - if constexpr (std::is_same_v) { - col_to_data[i] = HashUtil::murmur_hash3_32( - reinterpret_cast(&data[current_offset]), - offsets[i] - current_offset, - assert_cast(col_to).get_data()[i]); - } else { - murmur_hash3_x64_64(reinterpret_cast(&data[current_offset]), - offsets[i] - current_offset, - assert_cast(col_to).get_data()[i], - col_to_data + i); - } + murmur_hash3_x64_64(reinterpret_cast(&data[current_offset]), + offsets[i] - current_offset, col_to_data[i], + col_to_data.data() + i); } current_offset = offsets[i]; } @@ -117,28 +94,12 @@ struct MurmurHash3Impl { check_and_get_column_const_string_or_fixedstring(column)) { auto value = col_from_const->get_value(); for (size_t i = 0; i < input_rows_count; ++i) { - if (first) { - if constexpr (std::is_same_v) { - UInt32 val = HashUtil::murmur_hash3_32(value.data(), value.size(), - HashUtil::MURMUR3_32_SEED); - col_to.insert_data(const_cast(reinterpret_cast(&val)), - 0); - } else { - UInt64 val = 0; - murmur_hash3_x64_64(value.data(), value.size(), 0, &val); - col_to.insert_data(const_cast(reinterpret_cast(&val)), - 0); - } + if constexpr (std::is_same_v) { + col_to_data[i] = + HashUtil::murmur_hash3_32(value.data(), value.size(), col_to_data[i]); } else { - if constexpr (std::is_same_v) { - col_to_data[i] = HashUtil::murmur_hash3_32( - value.data(), value.size(), - assert_cast(col_to).get_data()[i]); - } else { - murmur_hash3_x64_64(value.data(), value.size(), - assert_cast(col_to).get_data()[i], - col_to_data + i); - } + murmur_hash3_x64_64(value.data(), value.size(), col_to_data[i], + col_to_data.data() + i); } } } else { @@ -149,11 +110,83 @@ struct MurmurHash3Impl { return Status::OK(); } }; + using FunctionMurmurHash3_32 = FunctionVariadicArgumentsBase>; using FunctionMurmurHash3_64 = FunctionVariadicArgumentsBase>; +template +struct XxHashImpl { + static constexpr auto name = std::is_same_v ? "xxhash_32" : "xxhash_64"; + + static Status empty_apply(IColumn& icolumn, size_t input_rows_count) { + ColumnVector& vec_to = assert_cast&>(icolumn); + vec_to.get_data().assign(input_rows_count, static_cast(emtpy_value)); + return Status::OK(); + } + + static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count, + IColumn& icolumn) { + return execute(type, column, input_rows_count, icolumn); + } + + static Status combine_apply(const IDataType* type, const IColumn* column, + size_t input_rows_count, IColumn& icolumn) { + return execute(type, column, input_rows_count, icolumn); + } + + template + static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count, + IColumn& col_to) { + auto& to_column = assert_cast&>(col_to); + if constexpr (first) { + to_column.insert_many_defaults(input_rows_count); + } + auto& col_to_data = to_column.get_data(); + if (const auto* col_from = check_and_get_column(column)) { + const typename ColumnString::Chars& data = col_from->get_chars(); + const typename ColumnString::Offsets& offsets = col_from->get_offsets(); + size_t size = offsets.size(); + ColumnString::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) { + if constexpr (std::is_same_v) { + col_to_data[i] = HashUtil::xxHash32WithSeed( + reinterpret_cast(&data[current_offset]), + offsets[i] - current_offset, col_to_data[i]); + } else { + col_to_data[i] = HashUtil::xxHash64WithSeed( + reinterpret_cast(&data[current_offset]), + offsets[i] - current_offset, col_to_data[i]); + } + current_offset = offsets[i]; + } + } else if (const ColumnConst* col_from_const = + check_and_get_column_const_string_or_fixedstring(column)) { + auto value = col_from_const->get_value(); + for (size_t i = 0; i < input_rows_count; ++i) { + if constexpr (std::is_same_v) { + col_to_data[i] = + HashUtil::xxHash32WithSeed(value.data(), value.size(), col_to_data[i]); + } else { + col_to_data[i] = + HashUtil::xxHash64WithSeed(value.data(), value.size(), col_to_data[i]); + } + } + } else { + DCHECK(false); + return Status::NotSupported("Illegal column {} of argument of function {}", + column->get_name(), name); + } + return Status::OK(); + } +}; + +using FunctionXxHash_32 = FunctionVariadicArgumentsBase>; +using FunctionXxHash_64 = FunctionVariadicArgumentsBase>; + void register_function_hash(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); + factory.register_function(); + factory.register_function(); } } // namespace doris::vectorized \ No newline at end of file diff --git a/be/test/vec/function/function_hash_test.cpp b/be/test/vec/function/function_hash_test.cpp index 10c57d1c31..4d2cf6be4b 100644 --- a/be/test/vec/function/function_hash_test.cpp +++ b/be/test/vec/function/function_hash_test.cpp @@ -94,4 +94,68 @@ TEST(HashFunctionTest, murmur_hash_3_64_test) { }; } +TEST(HashFunctionTest, xxhash_32_test) { + std::string func_name = "xxhash_32"; + + { + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set = {{{Null()}, Null()}, {{std::string("hello")}, (int32_t)-83855367}}; + + static_cast(check_function(func_name, input_types, data_set)); + }; + + { + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; + + DataSet data_set = {{{std::string("hello"), std::string("world")}, (int32_t)-920844969}, + {{std::string("hello"), Null()}, Null()}}; + + static_cast(check_function(func_name, input_types, data_set)); + }; + + { + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String}; + + DataSet data_set = {{{std::string("hello"), std::string("world"), std::string("!")}, + (int32_t)352087701}, + {{std::string("hello"), std::string("world"), Null()}, Null()}}; + + static_cast(check_function(func_name, input_types, data_set)); + }; +} + +TEST(HashFunctionTest, xxhash_64_test) { + std::string func_name = "xxhash_64"; + + { + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set = {{{Null()}, Null()}, + {{std::string("hello")}, (int64_t)-7685981735718036227}}; + + static_cast(check_function(func_name, input_types, data_set)); + }; + + { + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; + + DataSet data_set = { + {{std::string("hello"), std::string("world")}, (int64_t)7001965798170371843}, + {{std::string("hello"), Null()}, Null()}}; + + static_cast(check_function(func_name, input_types, data_set)); + }; + + { + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String}; + + DataSet data_set = {{{std::string("hello"), std::string("world"), std::string("!")}, + (int64_t)6796829678999971400}, + {{std::string("hello"), std::string("world"), Null()}, Null()}}; + + static_cast(check_function(func_name, input_types, data_set)); + }; +} + } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md b/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md index 7610d4ea27..051a5c262f 100644 --- a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md +++ b/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md @@ -31,6 +31,8 @@ under the License. Return the 32 bits murmur3 hash of input string. +Note: When calculating hash values, it is more recommended to use `xxhash_32` instead of `murmur_hash3_32`. + ### example ``` diff --git a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md b/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md index d1965f3ed0..fb9d1dd621 100644 --- a/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md +++ b/docs/en/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md @@ -31,6 +31,8 @@ under the License. Return the 64 bits murmur3 hash of input string. +Note: When calculating hash values, it is more recommended to use `xxhash_64` instead of `murmur_hash3_64`. + ### example ``` diff --git a/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md b/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md new file mode 100644 index 0000000000..3707d7a70c --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md @@ -0,0 +1,63 @@ +--- +{ + "title": "XXHASH_32", + "language": "en" +} +--- + + + +## xxhash_32 + +### description +#### Syntax + +`INT XXHASH_32(VARCHAR input, ...)` + +Return the 32 bits xxhash of input string. + +Note: When calculating hash values, it is more recommended to use `xxhash_32` instead of `murmur_hash3_32`. + +### example + +``` +mysql> select xxhash_32(NULL); ++-----------------+ +| xxhash_32(NULL) | ++-----------------+ +| NULL | ++-----------------+ + +mysql> select xxhash_32("hello"); ++--------------------+ +| xxhash_32('hello') | ++--------------------+ +| -83855367 | ++--------------------+ + +mysql> select xxhash_32("hello", "world"); ++-----------------------------+ +| xxhash_32('hello', 'world') | ++-----------------------------+ +| -920844969 | ++-----------------------------+ +``` + +### keywords + +XXHASH_32,HASH diff --git a/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md b/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md new file mode 100644 index 0000000000..506613177e --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md @@ -0,0 +1,85 @@ +--- +{ + "title": "XXHASH_64", + "language": "en" +} +--- + + + +## xxhash_64 + +### description +#### Syntax + +`BIGINT XXHASH_64(VARCHAR input, ...)` + +Return the 64 bits xxhash of input string. + +Note: When calculating hash values, it is more recommended to use `xxhash_64` instead of `murmur_hash3_64`. + +### example + +``` +mysql> select xxhash_64(NULL); ++-----------------+ +| xxhash_64(NULL) | ++-----------------+ +| NULL | ++-----------------+ + +mysql> select xxhash_64("hello"); ++----------------------+ +| xxhash_64('hello') | ++----------------------+ +| -7685981735718036227 | ++----------------------+ + +mysql> select xxhash_64("hello", "world"); ++-----------------------------+ +| xxhash_64('hello', 'world') | ++-----------------------------+ +| 7001965798170371843 | ++-----------------------------+ +``` + +### benchmark + +Through TPCH Benchmark testing, it was found that `xxhash_64` has significantly improved performance compared to `murmur_hash3_64`. Therefore, in scenarios where hash values need to be calculated, it is more recommended to use `xxhash_64`. + +``` +mysql> select count(murmur_hash3_64(l_comment)) from lineitem; ++-----------------------------------+ +| count(murmur_hash3_64(l_comment)) | ++-----------------------------------+ +| 600037902 | ++-----------------------------------+ +1 row in set (17.18 sec) + +mysql> select count(xxhash_64(l_comment)) from lineitem; ++-----------------------------+ +| count(xxhash_64(l_comment)) | ++-----------------------------+ +| 600037902 | ++-----------------------------+ +1 row in set (8.41 sec) +``` + +### keywords + +XXHASH_64,HASH diff --git a/docs/sidebars.json b/docs/sidebars.json index 48b3c3eea5..0d07a890ea 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -708,7 +708,9 @@ "label": "Hash Functions", "items": [ "sql-manual/sql-functions/hash-functions/murmur-hash3-32", - "sql-manual/sql-functions/hash-functions/murmur-hash3-64" + "sql-manual/sql-functions/hash-functions/murmur-hash3-64", + "sql-manual/sql-functions/hash-functions/xxhash-32", + "sql-manual/sql-functions/hash-functions/xxhash-64" ] }, { diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md index 93100700c7..57c840293d 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-32.md @@ -29,7 +29,9 @@ under the License. `INT MURMUR_HASH3_32(VARCHAR input, ...)` -返回输入字符串的32位murmur3 hash值 +返回输入字符串的32位murmur3 hash值。 + +注:在计算hash值时,更推荐使用`xxhash_32`,而不是`murmur_hash3_32`。 ### example diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md index 2a7f04d8f6..e113d67589 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/murmur-hash3-64.md @@ -29,7 +29,9 @@ under the License. `BIGINT MURMUR_HASH3_64(VARCHAR input, ...)` -返回输入字符串的64位murmur3 hash值 +返回输入字符串的64位murmur3 hash值。 + +注:在计算hash值时,更推荐使用`xxhash_64`,而不是`murmur_hash3_64`。 ### example diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md new file mode 100644 index 0000000000..9c839f90d8 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-32.md @@ -0,0 +1,62 @@ +--- +{ + "title": "XXHASH_32", + "language": "zh-CN" +} +--- + + + +## xxhash_32 + +### description +#### Syntax + +`INT XXHASH_32(VARCHAR input, ...)` + +返回输入字符串的32位xxhash值。 + +注:在计算hash值时,更推荐使用`xxhash_32`,而不是`murmur_hash3_32`。 + +### example + +``` +mysql> select xxhash_32(NULL); ++-----------------+ +| xxhash_32(NULL) | ++-----------------+ +| NULL | ++-----------------+ + +mysql> select xxhash_32("hello"); ++--------------------+ +| xxhash_32('hello') | ++--------------------+ +| -83855367 | ++--------------------+ + +mysql> select xxhash_32("hello", "world"); ++-----------------------------+ +| xxhash_32('hello', 'world') | ++-----------------------------+ +| -920844969 | ++-----------------------------+ +``` + +### keywords +HASH_32,HASH diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md new file mode 100644 index 0000000000..065e924233 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/hash-functions/xxhash-64.md @@ -0,0 +1,84 @@ +--- +{ + "title": "XXHASH_64", + "language": "zh-CN" +} +--- + + + +## xxhash_64 + +### description +#### Syntax + +`BIGINT XXHASH_64(VARCHAR input, ...)` + +返回输入字符串的64位xxhash值。 + +注:在计算hash值时,更推荐使用`xxhash_64`,而不是`murmur_hash3_64`。 + +### example + +``` +mysql> select xxhash_64(NULL); ++-----------------+ +| xxhash_64(NULL) | ++-----------------+ +| NULL | ++-----------------+ + +mysql> select xxhash_64("hello"); ++----------------------+ +| xxhash_64('hello') | ++----------------------+ +| -7685981735718036227 | ++----------------------+ + +mysql> select xxhash_64("hello", "world"); ++-----------------------------+ +| xxhash_64('hello', 'world') | ++-----------------------------+ +| 7001965798170371843 | ++-----------------------------+ +``` +### benchmark + +通过TPCH Benchmark测试发现,`xxhash_64`相比`murmur_hash3_64`来说性能大幅提升,因此在需要计算hash值的场景下,更推荐使用`xxhash_64`。 + +``` +mysql> select count(murmur_hash3_64(l_comment)) from lineitem; ++-----------------------------------+ +| count(murmur_hash3_64(l_comment)) | ++-----------------------------------+ +| 600037902 | ++-----------------------------------+ +1 row in set (17.18 sec) + +mysql> select count(xxhash_64(l_comment)) from lineitem; ++-----------------------------+ +| count(xxhash_64(l_comment)) | ++-----------------------------+ +| 600037902 | ++-----------------------------+ +1 row in set (8.41 sec) +``` + +### keywords + +XXHASH_64,HASH diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index 1ace763675..f5928650ef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -426,6 +426,8 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksAdd; import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff; import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub; import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket; +import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32; +import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64; import org.apache.doris.nereids.trees.expressions.functions.scalar.Year; import org.apache.doris.nereids.trees.expressions.functions.scalar.YearCeil; import org.apache.doris.nereids.trees.expressions.functions.scalar.YearFloor; @@ -876,6 +878,8 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(WeeksDiff.class, "weeks_diff"), scalar(WeeksSub.class, "weeks_sub"), scalar(WidthBucket.class, "width_bucket"), + scalar(XxHash32.class, "xxhash_32"), + scalar(XxHash64.class, "xxhash_64"), scalar(Year.class, "year"), scalar(YearCeil.class, "year_ceil"), scalar(YearFloor.class, "year_floor"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash32.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash32.java new file mode 100644 index 0000000000..149c2cbc76 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash32.java @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.util.ExpressionUtils; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'xxhash_32'. + */ +public class XxHash32 extends ScalarFunction + implements ExplicitlyCastableSignature, PropagateNullable { + + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(IntegerType.INSTANCE).varArgs(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(IntegerType.INSTANCE).varArgs(StringType.INSTANCE) + ); + + /** + * constructor with 1 or more arguments. + */ + public XxHash32(Expression arg, Expression... varArgs) { + super("xxhash_32", ExpressionUtils.mergeArguments(arg, varArgs)); + } + + /** + * withChildren. + */ + @Override + public XxHash32 withChildren(List children) { + Preconditions.checkArgument(children.size() >= 1); + return new XxHash32(children.get(0), + children.subList(1, children.size()).toArray(new Expression[0])); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitXxHash32(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash64.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash64.java new file mode 100644 index 0000000000..bc23d8c2a5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XxHash64.java @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.BigIntType; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.util.ExpressionUtils; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'xxhash_64'. + */ +public class XxHash64 extends ScalarFunction + implements ExplicitlyCastableSignature, PropagateNullable { + + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(BigIntType.INSTANCE).varArgs(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(BigIntType.INSTANCE).varArgs(StringType.INSTANCE) + ); + + /** + * constructor with 1 or more arguments. + */ + public XxHash64(Expression arg, Expression... varArgs) { + super("xxhash_64", ExpressionUtils.mergeArguments(arg, varArgs)); + } + + /** + * withChildren. + */ + @Override + public XxHash64 withChildren(List children) { + Preconditions.checkArgument(children.size() >= 1); + return new XxHash64(children.get(0), + children.subList(1, children.size()).toArray(new Expression[0])); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitXxHash64(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index 9a1ed84048..183b4a73da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -416,6 +416,8 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksAdd; import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff; import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub; import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket; +import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32; +import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64; import org.apache.doris.nereids.trees.expressions.functions.scalar.Year; import org.apache.doris.nereids.trees.expressions.functions.scalar.YearCeil; import org.apache.doris.nereids.trees.expressions.functions.scalar.YearFloor; @@ -1515,6 +1517,14 @@ public interface ScalarFunctionVisitor { return visitScalarFunction(murmurHash364, context); } + default R visitXxHash32(XxHash32 xxHash32, C context) { + return visitScalarFunction(xxHash32, context); + } + + default R visitXxHash64(XxHash64 xxHash64, C context) { + return visitScalarFunction(xxHash64, context); + } + default R visitNegative(Negative negative, C context) { return visitScalarFunction(negative, context); } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 1277f72db6..0ecb05612a 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1885,7 +1885,11 @@ visible_functions = { [['murmur_hash3_32'], 'INT', ['VARCHAR', '...'], ''], [['murmur_hash3_32'], 'INT', ['STRING', '...'], ''], [['murmur_hash3_64'], 'BIGINT', ['VARCHAR', '...'], ''], - [['murmur_hash3_64'], 'BIGINT', ['STRING', '...'], ''] + [['murmur_hash3_64'], 'BIGINT', ['STRING', '...'], ''], + [['xxhash_32'], 'INT', ['VARCHAR', '...'], ''], + [['xxhash_32'], 'INT', ['STRING', '...'], ''], + [['xxhash_64'], 'BIGINT', ['VARCHAR', '...'], ''], + [['xxhash_64'], 'BIGINT', ['STRING', '...'], ''] ], # aes and base64 function diff --git a/regression-test/data/nereids_p0/sql_functions/hash_functions/test_hash_function.out b/regression-test/data/nereids_p0/sql_functions/hash_functions/test_hash_function.out index 221936613d..984075ddef 100644 --- a/regression-test/data/nereids_p0/sql_functions/hash_functions/test_hash_function.out +++ b/regression-test/data/nereids_p0/sql_functions/hash_functions/test_hash_function.out @@ -17,3 +17,20 @@ -- !sql -- 3583109472027628045 +-- !sql -- +\N + +-- !sql -- +-83855367 + +-- !sql -- +-920844969 + +-- !sql -- +\N + +-- !sql -- +-7685981735718036227 + +-- !sql -- +7001965798170371843 diff --git a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out index 221936613d..984075ddef 100644 --- a/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out +++ b/regression-test/data/query_p0/sql_functions/hash_functions/test_hash_function.out @@ -17,3 +17,20 @@ -- !sql -- 3583109472027628045 +-- !sql -- +\N + +-- !sql -- +-83855367 + +-- !sql -- +-920844969 + +-- !sql -- +\N + +-- !sql -- +-7685981735718036227 + +-- !sql -- +7001965798170371843 diff --git a/regression-test/suites/nereids_p0/sql_functions/hash_functions/test_hash_function.groovy b/regression-test/suites/nereids_p0/sql_functions/hash_functions/test_hash_function.groovy index ae805f904c..8cae71a279 100644 --- a/regression-test/suites/nereids_p0/sql_functions/hash_functions/test_hash_function.groovy +++ b/regression-test/suites/nereids_p0/sql_functions/hash_functions/test_hash_function.groovy @@ -26,4 +26,12 @@ suite("test_hash_function") { qt_sql "SELECT murmur_hash3_64(null);" qt_sql "SELECT murmur_hash3_64(\"hello\");" qt_sql "SELECT murmur_hash3_64(\"hello\", \"world\");" + + qt_sql "SELECT xxhash_32(null);" + qt_sql "SELECT xxhash_32(\"hello\");" + qt_sql "SELECT xxhash_32(\"hello\", \"world\");" + + qt_sql "SELECT xxhash_64(null);" + qt_sql "SELECT xxhash_64(\"hello\");" + qt_sql "SELECT xxhash_64(\"hello\", \"world\");" } diff --git a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy index d44518509d..d547e9fb28 100644 --- a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy @@ -25,4 +25,12 @@ suite("test_hash_function", "arrow_flight_sql") { qt_sql "SELECT murmur_hash3_64(null);" qt_sql "SELECT murmur_hash3_64(\"hello\");" qt_sql "SELECT murmur_hash3_64(\"hello\", \"world\");" + + qt_sql "SELECT xxhash_32(null);" + qt_sql "SELECT xxhash_32(\"hello\");" + qt_sql "SELECT xxhash_32(\"hello\", \"world\");" + + qt_sql "SELECT xxhash_64(null);" + qt_sql "SELECT xxhash_64(\"hello\");" + qt_sql "SELECT xxhash_64(\"hello\", \"world\");" }