[Function][Vectorized] Support least/greast function (#8107)
Co-authored-by: lihaopeng <lihaopeng@baidu.com>
This commit is contained in:
@ -148,6 +148,7 @@ set(VEC_FILES
|
||||
functions/function_grouping.cpp
|
||||
functions/function_rpc.cpp
|
||||
functions/function_convert_tz.cpp
|
||||
functions/least_greast.cpp
|
||||
olap/vgeneric_iterators.cpp
|
||||
olap/vcollect_iterator.cpp
|
||||
olap/block_reader.cpp
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include "util/binary_cast.hpp"
|
||||
|
||||
#include "vec/common/nan_utils.h"
|
||||
#include "vec/common/string_ref.h"
|
||||
#include "vec/common/uint128.h"
|
||||
#include "vec/core/types.h"
|
||||
#include "vec/runtime/vdatetime_value.h"
|
||||
@ -487,8 +488,8 @@ struct EqualsOp {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct EqualsOp<VecDateTimeValue, VecDateTimeValue> {
|
||||
static UInt8 apply(const Int64& a, const Int64& b) {
|
||||
struct EqualsOp<DecimalV2Value, DecimalV2Value> {
|
||||
static UInt8 apply(const Int128& a, const Int128& b) {
|
||||
return a == b;
|
||||
}
|
||||
};
|
||||
@ -500,8 +501,8 @@ struct NotEqualsOp {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct NotEqualsOp<VecDateTimeValue, VecDateTimeValue> {
|
||||
static UInt8 apply(const Int64& a, const Int64& b) {
|
||||
struct NotEqualsOp<DecimalV2Value, DecimalV2Value> {
|
||||
static UInt8 apply(const Int128& a, const Int128& b) {
|
||||
return a != b;
|
||||
}
|
||||
};
|
||||
@ -516,9 +517,16 @@ struct LessOp {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct LessOp<VecDateTimeValue, VecDateTimeValue> {
|
||||
static UInt8 apply(Int64 a, Int64 b) {
|
||||
return binary_cast<Int64, VecDateTimeValue>(a) < binary_cast<Int64, VecDateTimeValue>(b);
|
||||
struct LessOp<DecimalV2Value, DecimalV2Value> {
|
||||
static UInt8 apply(Int128 a, Int128 b) {
|
||||
return binary_cast<Int128, DecimalV2Value>(a) < binary_cast<Int128, DecimalV2Value>(b);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct LessOp<StringRef, StringRef> {
|
||||
static UInt8 apply(StringRef a, StringRef b) {
|
||||
return a < b;
|
||||
}
|
||||
};
|
||||
|
||||
@ -529,9 +537,16 @@ struct GreaterOp {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GreaterOp<VecDateTimeValue, VecDateTimeValue> {
|
||||
static UInt8 apply(Int64 a, Int64 b) {
|
||||
return binary_cast<Int64, VecDateTimeValue>(a) > binary_cast<Int64, VecDateTimeValue>(b);
|
||||
struct GreaterOp<DecimalV2Value, DecimalV2Value> {
|
||||
static UInt8 apply(Int128 a, Int128 b) {
|
||||
return binary_cast<Int128, DecimalV2Value>(a) > binary_cast<Int128, DecimalV2Value>(b);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GreaterOp<StringRef, StringRef> {
|
||||
static UInt8 apply(StringRef a, StringRef b) {
|
||||
return a > b;
|
||||
}
|
||||
};
|
||||
|
||||
@ -545,9 +560,9 @@ struct LessOrEqualsOp {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct LessOrEqualsOp<VecDateTimeValue, VecDateTimeValue> {
|
||||
static UInt8 apply(Int64 a, Int64 b) {
|
||||
return binary_cast<Int64, VecDateTimeValue>(a) <= binary_cast<Int64, VecDateTimeValue>(b);
|
||||
struct LessOrEqualsOp<DecimalV2Value, DecimalV2Value> {
|
||||
static UInt8 apply(Int128 a, Int128 b) {
|
||||
return binary_cast<Int128, DecimalV2Value>(a) <= binary_cast<Int128, DecimalV2Value>(b);
|
||||
}
|
||||
};
|
||||
|
||||
@ -558,9 +573,9 @@ struct GreaterOrEqualsOp {
|
||||
};
|
||||
|
||||
template <>
|
||||
struct GreaterOrEqualsOp<VecDateTimeValue, VecDateTimeValue> {
|
||||
static UInt8 apply(Int64 a, Int64 b) {
|
||||
return binary_cast<Int64, VecDateTimeValue>(a) >= binary_cast<Int64, VecDateTimeValue>(b);
|
||||
struct GreaterOrEqualsOp<DecimalV2Value, DecimalV2Value> {
|
||||
static UInt8 apply(Int128 a, Int128 b) {
|
||||
return binary_cast<Int128, DecimalV2Value>(a) >= binary_cast<Int128, DecimalV2Value>(b);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -17,7 +17,6 @@
|
||||
|
||||
#include "udf/udf.h"
|
||||
#include "vec/data_types/get_least_supertype.h"
|
||||
#include "vec/functions/function_helpers.h"
|
||||
#include "vec/functions/simple_function_factory.h"
|
||||
#include "vec/utils/template_helpers.hpp"
|
||||
#include "vec/utils/util.hpp"
|
||||
|
||||
55
be/src/vec/functions/function_multi_same_args.h
Normal file
55
be/src/vec/functions/function_multi_same_args.h
Normal file
@ -0,0 +1,55 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "udf/udf.h"
|
||||
#include "vec/data_types/get_least_supertype.h"
|
||||
#include "vec/functions/function_helpers.h"
|
||||
#include "vec/functions/simple_function_factory.h"
|
||||
#include "vec/utils/template_helpers.hpp"
|
||||
#include "vec/utils/util.hpp"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
template <typename Impl>
|
||||
class FunctionMultiSameArgs : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = Impl::name;
|
||||
|
||||
static FunctionPtr create() { return std::make_shared<FunctionMultiSameArgs>(); }
|
||||
|
||||
String get_name() const override { return name; }
|
||||
|
||||
bool use_default_implementation_for_constants() const override { return true; }
|
||||
|
||||
bool use_default_implementation_for_nulls() const override { return true; }
|
||||
|
||||
bool is_variadic() const override { return true; }
|
||||
|
||||
size_t get_number_of_arguments() const override { return 0; }
|
||||
|
||||
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
|
||||
return Impl::get_return_type_impl(arguments);
|
||||
}
|
||||
|
||||
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
||||
size_t result, size_t input_rows_count) override {
|
||||
DCHECK_GE(arguments.size(), 1);
|
||||
block.replace_by_position(result, Impl::execute(block, arguments, input_rows_count));
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
};
|
||||
127
be/src/vec/functions/least_greast.cpp
Normal file
127
be/src/vec/functions/least_greast.cpp
Normal file
@ -0,0 +1,127 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "udf/udf.h"
|
||||
#include "vec/core/accurate_comparison.h"
|
||||
#include "vec/data_types/get_least_supertype.h"
|
||||
#include "vec/functions/function_helpers.h"
|
||||
#include "vec/functions/function_multi_same_args.h"
|
||||
#include "vec/functions/simple_function_factory.h"
|
||||
#include "vec/utils/template_helpers.hpp"
|
||||
#include "vec/utils/util.hpp"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
template <template <typename, typename> class Op, typename Impl>
|
||||
struct CompareMultiImpl {
|
||||
static constexpr auto name = Impl::name;
|
||||
|
||||
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
|
||||
return arguments[0];
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
static void insert_result_data(MutableColumnPtr& result_column, ColumnPtr& argument_column,
|
||||
const size_t input_rows_count) {
|
||||
auto* __restrict result_raw_data =
|
||||
reinterpret_cast<ColumnType*>(result_column.get())->get_data().data();
|
||||
auto* __restrict column_raw_data =
|
||||
reinterpret_cast<const ColumnType*>(argument_column.get())->get_data().data();
|
||||
|
||||
if constexpr (std::is_same_v<ColumnType, ColumnDecimal128>) {
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
result_raw_data[i] =
|
||||
Op<DecimalV2Value, DecimalV2Value>::apply(column_raw_data[i], result_raw_data[i]) ? column_raw_data[i] :
|
||||
result_raw_data[i];
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
using type = std::decay_t<decltype(result_raw_data[0])>;
|
||||
result_raw_data[i] =
|
||||
Op<type, type>::apply(column_raw_data[i], result_raw_data[i]) ? column_raw_data[i] :
|
||||
result_raw_data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static ColumnPtr execute(Block& block, const ColumnNumbers& arguments, size_t input_rows_count) {
|
||||
if (arguments.size() == 1) return block.get_by_position(arguments.back()).column;
|
||||
|
||||
const auto& data_type = block.get_by_position(arguments.back()).type;
|
||||
MutableColumnPtr result_column = data_type->create_column();
|
||||
|
||||
Columns args;
|
||||
for (int i = 0; i < arguments.size(); ++i) {
|
||||
args.emplace_back(block.get_by_position(arguments[i]).column->convert_to_full_column_if_const());
|
||||
}
|
||||
// because now the string types does not support random position writing,
|
||||
// so insert into result data have two methods, one is for string types, one is for others type remaining
|
||||
bool is_string_result = result_column->is_column_string();
|
||||
if (is_string_result) {
|
||||
result_column->reserve(input_rows_count);
|
||||
} else {
|
||||
result_column->insert_range_from(
|
||||
*(args[0]), 0, input_rows_count);
|
||||
}
|
||||
|
||||
if (is_string_result) {
|
||||
const auto& column_string = reinterpret_cast<const ColumnString&>(*args[0]);
|
||||
auto& column_res = reinterpret_cast<ColumnString&>(*result_column);
|
||||
|
||||
for (int i = 0; i < input_rows_count; ++i) {
|
||||
auto str_data = column_string.get_data_at(i);
|
||||
for (int j = 1; j < arguments.size(); ++j) {
|
||||
auto temp_data =
|
||||
reinterpret_cast<const ColumnString&>(*args[j]).get_data_at(i);
|
||||
str_data = Op<StringRef, StringRef>::apply(temp_data, str_data) ? temp_data : str_data;
|
||||
}
|
||||
column_res.insert_data(str_data.data, str_data.size);
|
||||
}
|
||||
|
||||
} else {
|
||||
WhichDataType which(data_type);
|
||||
#define DISPATCH(TYPE, COLUMN_TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) { \
|
||||
for (int i = 1; i < arguments.size(); ++i) { \
|
||||
insert_result_data<COLUMN_TYPE>(result_column, args[i], input_rows_count); \
|
||||
} \
|
||||
}
|
||||
NUMERIC_TYPE_TO_COLUMN_TYPE(DISPATCH)
|
||||
DISPATCH(Decimal128, ColumnDecimal<Decimal128>)
|
||||
TIME_TYPE_TO_COLUMN_TYPE(DISPATCH)
|
||||
#undef DISPATCH
|
||||
}
|
||||
|
||||
return result_column;
|
||||
}
|
||||
};
|
||||
|
||||
struct LeastName {
|
||||
static constexpr auto name = "least";
|
||||
};
|
||||
struct GreastName {
|
||||
static constexpr auto name = "greatest";
|
||||
};
|
||||
using FunctionLeast = FunctionMultiSameArgs<CompareMultiImpl<LessOp, LeastName>>;
|
||||
using FunctionGreaest = FunctionMultiSameArgs<CompareMultiImpl<GreaterOp, GreastName>>;
|
||||
|
||||
|
||||
void register_function_least_greast(SimpleFunctionFactory& factory) {
|
||||
factory.register_function<FunctionLeast>();
|
||||
factory.register_function<FunctionGreaest>();
|
||||
}
|
||||
};
|
||||
@ -69,6 +69,7 @@ void register_function_coalesce(SimpleFunctionFactory& factory);
|
||||
void register_function_grouping(SimpleFunctionFactory& factory);
|
||||
void register_function_datetime_floor_ceil(SimpleFunctionFactory& factory);
|
||||
void register_function_convert_tz(SimpleFunctionFactory& factory);
|
||||
void register_function_least_greast(SimpleFunctionFactory& factory);
|
||||
|
||||
class SimpleFunctionFactory {
|
||||
using Creator = std::function<FunctionBuilderPtr()>;
|
||||
@ -187,6 +188,7 @@ public:
|
||||
register_function_grouping(instance);
|
||||
register_function_datetime_floor_ceil(instance);
|
||||
register_function_convert_tz(instance);
|
||||
register_function_least_greast(instance);
|
||||
});
|
||||
return instance;
|
||||
}
|
||||
|
||||
@ -379,6 +379,28 @@ TEST(MathFunctionTest, round_test) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(MathFunctionTest, least_test) {
|
||||
std::string func_name = "least";
|
||||
|
||||
InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
|
||||
|
||||
DataSet data_set = {{{3, 2}, 2}, {{3, 3}, 3}, {{Null(), -2}, Null()},
|
||||
{{193, -2}, -2}, {{193, -1}, -1}};
|
||||
|
||||
check_function<DataTypeInt32, true>(func_name, input_types, data_set);
|
||||
}
|
||||
|
||||
TEST(MathFunctionTest, greatest_test) {
|
||||
std::string func_name = "greatest";
|
||||
|
||||
InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
|
||||
|
||||
DataSet data_set = {{{3, 2}, 3}, {{3, 3}, 3}, {{Null(), -2}, Null()},
|
||||
{{193, -2}, 193}, {{193, -1}, 193}};
|
||||
|
||||
check_function<DataTypeInt32, true>(func_name, input_types, data_set);
|
||||
}
|
||||
|
||||
TEST(MathFunctionTest, bin_test) {
|
||||
std::string func_name = "bin";
|
||||
|
||||
|
||||
@ -687,71 +687,71 @@ visible_functions = [
|
||||
|
||||
[['least'], 'TINYINT', ['TINYINT', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_10TinyIntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'SMALLINT', ['SMALLINT', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_11SmallIntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'INT', ['INT', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_6IntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'BIGINT', ['BIGINT', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9BigIntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'LARGEINT', ['LARGEINT', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_11LargeIntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'FLOAT', ['FLOAT', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_8FloatValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'DOUBLE', ['DOUBLE', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9DoubleValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'DATETIME', ['DATETIME', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_11DateTimeValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'DECIMALV2', ['DECIMALV2', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_12DecimalV2ValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'VARCHAR', ['VARCHAR', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['least'], 'STRING', ['STRING', '...'],
|
||||
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
|
||||
[['greatest'], 'TINYINT', ['TINYINT', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_10TinyIntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'SMALLINT', ['SMALLINT', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_11SmallIntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'INT', ['INT', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_6IntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'BIGINT', ['BIGINT', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9BigIntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'LARGEINT', ['LARGEINT', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_11LargeIntValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'FLOAT', ['FLOAT', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_8FloatValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'DOUBLE', ['DOUBLE', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9DoubleValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'DECIMALV2', ['DECIMALV2', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_12DecimalV2ValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'DATETIME', ['DATETIME', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_11DateTimeValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'VARCHAR', ['VARCHAR', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
[['greatest'], 'STRING', ['STRING', '...'],
|
||||
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
|
||||
'', '', '', ''],
|
||||
'', '', 'vec', ''],
|
||||
|
||||
# Conditional Functions
|
||||
# Some of these have empty symbols because the BE special-cases them based on the
|
||||
|
||||
Reference in New Issue
Block a user