[Function][Vectorized] Support least/greast function (#8107)

Co-authored-by: lihaopeng <lihaopeng@baidu.com>
This commit is contained in:
HappenLee
2022-02-18 11:57:07 +08:00
committed by GitHub
parent 68b24d608f
commit bcde1f265a
8 changed files with 260 additions and 39 deletions

View File

@ -148,6 +148,7 @@ set(VEC_FILES
functions/function_grouping.cpp
functions/function_rpc.cpp
functions/function_convert_tz.cpp
functions/least_greast.cpp
olap/vgeneric_iterators.cpp
olap/vcollect_iterator.cpp
olap/block_reader.cpp

View File

@ -27,6 +27,7 @@
#include "util/binary_cast.hpp"
#include "vec/common/nan_utils.h"
#include "vec/common/string_ref.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
#include "vec/runtime/vdatetime_value.h"
@ -487,8 +488,8 @@ struct EqualsOp {
};
template <>
struct EqualsOp<VecDateTimeValue, VecDateTimeValue> {
static UInt8 apply(const Int64& a, const Int64& b) {
struct EqualsOp<DecimalV2Value, DecimalV2Value> {
static UInt8 apply(const Int128& a, const Int128& b) {
return a == b;
}
};
@ -500,8 +501,8 @@ struct NotEqualsOp {
};
template <>
struct NotEqualsOp<VecDateTimeValue, VecDateTimeValue> {
static UInt8 apply(const Int64& a, const Int64& b) {
struct NotEqualsOp<DecimalV2Value, DecimalV2Value> {
static UInt8 apply(const Int128& a, const Int128& b) {
return a != b;
}
};
@ -516,9 +517,16 @@ struct LessOp {
};
template <>
struct LessOp<VecDateTimeValue, VecDateTimeValue> {
static UInt8 apply(Int64 a, Int64 b) {
return binary_cast<Int64, VecDateTimeValue>(a) < binary_cast<Int64, VecDateTimeValue>(b);
struct LessOp<DecimalV2Value, DecimalV2Value> {
static UInt8 apply(Int128 a, Int128 b) {
return binary_cast<Int128, DecimalV2Value>(a) < binary_cast<Int128, DecimalV2Value>(b);
}
};
template <>
struct LessOp<StringRef, StringRef> {
static UInt8 apply(StringRef a, StringRef b) {
return a < b;
}
};
@ -529,9 +537,16 @@ struct GreaterOp {
};
template <>
struct GreaterOp<VecDateTimeValue, VecDateTimeValue> {
static UInt8 apply(Int64 a, Int64 b) {
return binary_cast<Int64, VecDateTimeValue>(a) > binary_cast<Int64, VecDateTimeValue>(b);
struct GreaterOp<DecimalV2Value, DecimalV2Value> {
static UInt8 apply(Int128 a, Int128 b) {
return binary_cast<Int128, DecimalV2Value>(a) > binary_cast<Int128, DecimalV2Value>(b);
}
};
template <>
struct GreaterOp<StringRef, StringRef> {
static UInt8 apply(StringRef a, StringRef b) {
return a > b;
}
};
@ -545,9 +560,9 @@ struct LessOrEqualsOp {
};
template <>
struct LessOrEqualsOp<VecDateTimeValue, VecDateTimeValue> {
static UInt8 apply(Int64 a, Int64 b) {
return binary_cast<Int64, VecDateTimeValue>(a) <= binary_cast<Int64, VecDateTimeValue>(b);
struct LessOrEqualsOp<DecimalV2Value, DecimalV2Value> {
static UInt8 apply(Int128 a, Int128 b) {
return binary_cast<Int128, DecimalV2Value>(a) <= binary_cast<Int128, DecimalV2Value>(b);
}
};
@ -558,9 +573,9 @@ struct GreaterOrEqualsOp {
};
template <>
struct GreaterOrEqualsOp<VecDateTimeValue, VecDateTimeValue> {
static UInt8 apply(Int64 a, Int64 b) {
return binary_cast<Int64, VecDateTimeValue>(a) >= binary_cast<Int64, VecDateTimeValue>(b);
struct GreaterOrEqualsOp<DecimalV2Value, DecimalV2Value> {
static UInt8 apply(Int128 a, Int128 b) {
return binary_cast<Int128, DecimalV2Value>(a) >= binary_cast<Int128, DecimalV2Value>(b);
}
};

View File

@ -17,7 +17,6 @@
#include "udf/udf.h"
#include "vec/data_types/get_least_supertype.h"
#include "vec/functions/function_helpers.h"
#include "vec/functions/simple_function_factory.h"
#include "vec/utils/template_helpers.hpp"
#include "vec/utils/util.hpp"

View File

@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "udf/udf.h"
#include "vec/data_types/get_least_supertype.h"
#include "vec/functions/function_helpers.h"
#include "vec/functions/simple_function_factory.h"
#include "vec/utils/template_helpers.hpp"
#include "vec/utils/util.hpp"
namespace doris::vectorized {
template <typename Impl>
class FunctionMultiSameArgs : public IFunction {
public:
static constexpr auto name = Impl::name;
static FunctionPtr create() { return std::make_shared<FunctionMultiSameArgs>(); }
String get_name() const override { return name; }
bool use_default_implementation_for_constants() const override { return true; }
bool use_default_implementation_for_nulls() const override { return true; }
bool is_variadic() const override { return true; }
size_t get_number_of_arguments() const override { return 0; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return Impl::get_return_type_impl(arguments);
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
DCHECK_GE(arguments.size(), 1);
block.replace_by_position(result, Impl::execute(block, arguments, input_rows_count));
return Status::OK();
}
};
};

View File

@ -0,0 +1,127 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "udf/udf.h"
#include "vec/core/accurate_comparison.h"
#include "vec/data_types/get_least_supertype.h"
#include "vec/functions/function_helpers.h"
#include "vec/functions/function_multi_same_args.h"
#include "vec/functions/simple_function_factory.h"
#include "vec/utils/template_helpers.hpp"
#include "vec/utils/util.hpp"
namespace doris::vectorized {
template <template <typename, typename> class Op, typename Impl>
struct CompareMultiImpl {
static constexpr auto name = Impl::name;
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
return arguments[0];
}
template <typename ColumnType>
static void insert_result_data(MutableColumnPtr& result_column, ColumnPtr& argument_column,
const size_t input_rows_count) {
auto* __restrict result_raw_data =
reinterpret_cast<ColumnType*>(result_column.get())->get_data().data();
auto* __restrict column_raw_data =
reinterpret_cast<const ColumnType*>(argument_column.get())->get_data().data();
if constexpr (std::is_same_v<ColumnType, ColumnDecimal128>) {
for (size_t i = 0; i < input_rows_count; ++i) {
result_raw_data[i] =
Op<DecimalV2Value, DecimalV2Value>::apply(column_raw_data[i], result_raw_data[i]) ? column_raw_data[i] :
result_raw_data[i];
}
} else {
for (size_t i = 0; i < input_rows_count; ++i) {
using type = std::decay_t<decltype(result_raw_data[0])>;
result_raw_data[i] =
Op<type, type>::apply(column_raw_data[i], result_raw_data[i]) ? column_raw_data[i] :
result_raw_data[i];
}
}
}
static ColumnPtr execute(Block& block, const ColumnNumbers& arguments, size_t input_rows_count) {
if (arguments.size() == 1) return block.get_by_position(arguments.back()).column;
const auto& data_type = block.get_by_position(arguments.back()).type;
MutableColumnPtr result_column = data_type->create_column();
Columns args;
for (int i = 0; i < arguments.size(); ++i) {
args.emplace_back(block.get_by_position(arguments[i]).column->convert_to_full_column_if_const());
}
// because now the string types does not support random position writing,
// so insert into result data have two methods, one is for string types, one is for others type remaining
bool is_string_result = result_column->is_column_string();
if (is_string_result) {
result_column->reserve(input_rows_count);
} else {
result_column->insert_range_from(
*(args[0]), 0, input_rows_count);
}
if (is_string_result) {
const auto& column_string = reinterpret_cast<const ColumnString&>(*args[0]);
auto& column_res = reinterpret_cast<ColumnString&>(*result_column);
for (int i = 0; i < input_rows_count; ++i) {
auto str_data = column_string.get_data_at(i);
for (int j = 1; j < arguments.size(); ++j) {
auto temp_data =
reinterpret_cast<const ColumnString&>(*args[j]).get_data_at(i);
str_data = Op<StringRef, StringRef>::apply(temp_data, str_data) ? temp_data : str_data;
}
column_res.insert_data(str_data.data, str_data.size);
}
} else {
WhichDataType which(data_type);
#define DISPATCH(TYPE, COLUMN_TYPE) \
if (which.idx == TypeIndex::TYPE) { \
for (int i = 1; i < arguments.size(); ++i) { \
insert_result_data<COLUMN_TYPE>(result_column, args[i], input_rows_count); \
} \
}
NUMERIC_TYPE_TO_COLUMN_TYPE(DISPATCH)
DISPATCH(Decimal128, ColumnDecimal<Decimal128>)
TIME_TYPE_TO_COLUMN_TYPE(DISPATCH)
#undef DISPATCH
}
return result_column;
}
};
struct LeastName {
static constexpr auto name = "least";
};
struct GreastName {
static constexpr auto name = "greatest";
};
using FunctionLeast = FunctionMultiSameArgs<CompareMultiImpl<LessOp, LeastName>>;
using FunctionGreaest = FunctionMultiSameArgs<CompareMultiImpl<GreaterOp, GreastName>>;
void register_function_least_greast(SimpleFunctionFactory& factory) {
factory.register_function<FunctionLeast>();
factory.register_function<FunctionGreaest>();
}
};

View File

@ -69,6 +69,7 @@ void register_function_coalesce(SimpleFunctionFactory& factory);
void register_function_grouping(SimpleFunctionFactory& factory);
void register_function_datetime_floor_ceil(SimpleFunctionFactory& factory);
void register_function_convert_tz(SimpleFunctionFactory& factory);
void register_function_least_greast(SimpleFunctionFactory& factory);
class SimpleFunctionFactory {
using Creator = std::function<FunctionBuilderPtr()>;
@ -187,6 +188,7 @@ public:
register_function_grouping(instance);
register_function_datetime_floor_ceil(instance);
register_function_convert_tz(instance);
register_function_least_greast(instance);
});
return instance;
}

View File

@ -379,6 +379,28 @@ TEST(MathFunctionTest, round_test) {
}
}
TEST(MathFunctionTest, least_test) {
std::string func_name = "least";
InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
DataSet data_set = {{{3, 2}, 2}, {{3, 3}, 3}, {{Null(), -2}, Null()},
{{193, -2}, -2}, {{193, -1}, -1}};
check_function<DataTypeInt32, true>(func_name, input_types, data_set);
}
TEST(MathFunctionTest, greatest_test) {
std::string func_name = "greatest";
InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
DataSet data_set = {{{3, 2}, 3}, {{3, 3}, 3}, {{Null(), -2}, Null()},
{{193, -2}, 193}, {{193, -1}, 193}};
check_function<DataTypeInt32, true>(func_name, input_types, data_set);
}
TEST(MathFunctionTest, bin_test) {
std::string func_name = "bin";

View File

@ -687,71 +687,71 @@ visible_functions = [
[['least'], 'TINYINT', ['TINYINT', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_10TinyIntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'SMALLINT', ['SMALLINT', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_11SmallIntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'INT', ['INT', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_6IntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'BIGINT', ['BIGINT', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9BigIntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'LARGEINT', ['LARGEINT', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_11LargeIntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'FLOAT', ['FLOAT', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_8FloatValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'DOUBLE', ['DOUBLE', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9DoubleValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'DATETIME', ['DATETIME', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_11DateTimeValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'DECIMALV2', ['DECIMALV2', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_12DecimalV2ValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'VARCHAR', ['VARCHAR', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', '', ''],
'', '', 'vec', ''],
[['least'], 'STRING', ['STRING', '...'],
'_ZN5doris13MathFunctions5leastEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'TINYINT', ['TINYINT', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_10TinyIntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'SMALLINT', ['SMALLINT', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_11SmallIntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'INT', ['INT', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_6IntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'BIGINT', ['BIGINT', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9BigIntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'LARGEINT', ['LARGEINT', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_11LargeIntValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'FLOAT', ['FLOAT', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_8FloatValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'DOUBLE', ['DOUBLE', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9DoubleValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'DECIMALV2', ['DECIMALV2', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_12DecimalV2ValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'DATETIME', ['DATETIME', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_11DateTimeValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'VARCHAR', ['VARCHAR', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', '', ''],
'', '', 'vec', ''],
[['greatest'], 'STRING', ['STRING', '...'],
'_ZN5doris13MathFunctions8greatestEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', '', ''],
'', '', 'vec', ''],
# Conditional Functions
# Some of these have empty symbols because the BE special-cases them based on the