Files
doris/be/src/vec/functions/functions_comparison.h
chenlinzhong c9961c9bb9 [style] clang-format all c++ code (#9305)
- sh build-support/clang-format.sh  to  clang-format all c++ code
2022-04-29 16:14:22 +08:00

426 lines
20 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h
// and modified by Doris
#pragma once
#include <limits>
#include <type_traits>
#include "common/logging.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/common/memcmp_small.h"
#include "vec/core/accurate_comparison.h"
#include "vec/core/decimal_comparison.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/get_least_supertype.h"
#include "vec/functions/function.h"
#include "vec/functions/function_helpers.h"
#include "vec/functions/functions_logical.h"
#include "vec/runtime/vdatetime_value.h"
namespace doris::vectorized {
/** Comparison functions: ==, !=, <, >, <=, >=.
* The comparison functions always return 0 or 1 (UInt8).
*
* You can compare the following types:
* - numbers and decimals;
* - strings and fixed strings;
* - dates;
* - datetimes;
* within each group, but not from different groups;
* - tuples (lexicographic comparison).
*
* Exception: You can compare the date and datetime with a constant string. Example: EventDate = '2015-01-01'.
*/
template <typename A, typename B, typename Op>
struct NumComparisonImpl {
/// If you don't specify NO_INLINE, the compiler will inline this function, but we don't need this as this function contains tight loop inside.
static void NO_INLINE vector_vector(const PaddedPODArray<A>& a, const PaddedPODArray<B>& b,
PaddedPODArray<UInt8>& c) {
/** GCC 4.8.2 vectorizes a loop only if it is written in this form.
* In this case, if you loop through the array index (the code will look simpler),
* the loop will not be vectorized.
*/
size_t size = a.size();
const A* a_pos = a.data();
const B* b_pos = b.data();
UInt8* c_pos = c.data();
const A* a_end = a_pos + size;
while (a_pos < a_end) {
*c_pos = Op::apply(*a_pos, *b_pos);
++a_pos;
++b_pos;
++c_pos;
}
}
static void NO_INLINE vector_constant(const PaddedPODArray<A>& a, B b,
PaddedPODArray<UInt8>& c) {
size_t size = a.size();
const A* a_pos = a.data();
UInt8* c_pos = c.data();
const A* a_end = a_pos + size;
while (a_pos < a_end) {
*c_pos = Op::apply(*a_pos, b);
++a_pos;
++c_pos;
}
}
static void constant_vector(A a, const PaddedPODArray<B>& b, PaddedPODArray<UInt8>& c) {
NumComparisonImpl<B, A, typename Op::SymmetricOp>::vector_constant(b, a, c);
}
static void constant_constant(A a, B b, UInt8& c) { c = Op::apply(a, b); }
};
/// Generic version, implemented for columns of same type.
template <typename Op>
struct GenericComparisonImpl {
static void NO_INLINE vector_vector(const IColumn& a, const IColumn& b,
PaddedPODArray<UInt8>& c) {
for (size_t i = 0, size = a.size(); i < size; ++i)
c[i] = Op::apply(a.compare_at(i, i, b, 1), 0);
}
static void NO_INLINE vector_constant(const IColumn& a, const IColumn& b,
PaddedPODArray<UInt8>& c) {
auto b_materialized = b.clone_resized(1)->convert_to_full_column_if_const();
for (size_t i = 0, size = a.size(); i < size; ++i)
c[i] = Op::apply(a.compare_at(i, 0, *b_materialized, 1), 0);
}
static void constant_vector(const IColumn& a, const IColumn& b, PaddedPODArray<UInt8>& c) {
GenericComparisonImpl<typename Op::SymmetricOp>::vector_constant(b, a, c);
}
static void constant_constant(const IColumn& a, const IColumn& b, UInt8& c) {
c = Op::apply(a.compare_at(0, 0, b, 1), 0);
}
};
struct NameEquals {
static constexpr auto name = "eq";
};
struct NameNotEquals {
static constexpr auto name = "ne";
};
struct NameLess {
static constexpr auto name = "lt";
};
struct NameGreater {
static constexpr auto name = "gt";
};
struct NameLessOrEquals {
static constexpr auto name = "le";
};
struct NameGreaterOrEquals {
static constexpr auto name = "ge";
};
template <template <typename, typename> class Op, typename Name>
class FunctionComparison : public IFunction {
public:
static constexpr auto name = Name::name;
static FunctionPtr create() { return std::make_shared<FunctionComparison>(); }
FunctionComparison() = default;
private:
template <typename T0, typename T1>
bool execute_num_right_type(Block& block, size_t result, const ColumnVector<T0>* col_left,
const IColumn* col_right_untyped) {
if (const ColumnVector<T1>* col_right =
check_and_get_column<ColumnVector<T1>>(col_right_untyped)) {
auto col_res = ColumnUInt8::create();
ColumnUInt8::Container& vec_res = col_res->get_data();
vec_res.resize(col_left->get_data().size());
NumComparisonImpl<T0, T1, Op<T0, T1>>::vector_vector(col_left->get_data(),
col_right->get_data(), vec_res);
block.replace_by_position(result, std::move(col_res));
return true;
} else if (auto col_right_const =
check_and_get_column_const<ColumnVector<T1>>(col_right_untyped)) {
auto col_res = ColumnUInt8::create();
ColumnUInt8::Container& vec_res = col_res->get_data();
vec_res.resize(col_left->size());
NumComparisonImpl<T0, T1, Op<T0, T1>>::vector_constant(
col_left->get_data(), col_right_const->template get_value<T1>(), vec_res);
block.replace_by_position(result, std::move(col_res));
return true;
}
return false;
}
template <typename T0, typename T1>
bool execute_num_const_right_type(Block& block, size_t result, const ColumnConst* col_left,
const IColumn* col_right_untyped) {
if (const ColumnVector<T1>* col_right =
check_and_get_column<ColumnVector<T1>>(col_right_untyped)) {
auto col_res = ColumnUInt8::create();
ColumnUInt8::Container& vec_res = col_res->get_data();
vec_res.resize(col_left->size());
NumComparisonImpl<T0, T1, Op<T0, T1>>::constant_vector(
col_left->template get_value<T0>(), col_right->get_data(), vec_res);
block.replace_by_position(result, std::move(col_res));
return true;
} else if (auto col_right_const =
check_and_get_column_const<ColumnVector<T1>>(col_right_untyped)) {
UInt8 res = 0;
NumComparisonImpl<T0, T1, Op<T0, T1>>::constant_constant(
col_left->template get_value<T0>(), col_right_const->template get_value<T1>(),
res);
block.replace_by_position(
result, DataTypeUInt8().create_column_const(col_left->size(), to_field(res)));
return true;
}
return false;
}
template <typename T0>
bool execute_num_left_type(Block& block, size_t result, const IColumn* col_left_untyped,
const IColumn* col_right_untyped) {
if (const ColumnVector<T0>* col_left =
check_and_get_column<ColumnVector<T0>>(col_left_untyped)) {
if (execute_num_right_type<T0, UInt8>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, UInt16>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, UInt32>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, UInt64>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, Int8>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, Int16>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, Int32>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, Int64>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, Int128>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, Float32>(block, result, col_left, col_right_untyped) ||
execute_num_right_type<T0, Float64>(block, result, col_left, col_right_untyped))
return true;
else {
LOG(FATAL) << "Illegal column " << col_right_untyped->get_name()
<< " of second argument of function " << get_name();
}
} else if (auto col_left_const =
check_and_get_column_const<ColumnVector<T0>>(col_left_untyped)) {
if (execute_num_const_right_type<T0, UInt8>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, UInt16>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, UInt32>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, UInt64>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, Int8>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, Int16>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, Int32>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, Int64>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, Int128>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, Float32>(block, result, col_left_const,
col_right_untyped) ||
execute_num_const_right_type<T0, Float64>(block, result, col_left_const,
col_right_untyped))
return true;
else {
LOG(FATAL) << "Illegal column " << col_right_untyped->get_name()
<< " of second argument of function " << get_name();
}
}
return false;
}
Status execute_decimal(Block& block, size_t result, const ColumnWithTypeAndName& col_left,
const ColumnWithTypeAndName& col_right) {
TypeIndex left_number = col_left.type->get_type_id();
TypeIndex right_number = col_right.type->get_type_id();
auto call = [&](const auto& types) -> bool {
using Types = std::decay_t<decltype(types)>;
using LeftDataType = typename Types::LeftType;
using RightDataType = typename Types::RightType;
DecimalComparison<LeftDataType, RightDataType, Op, false>(block, result, col_left,
col_right);
return true;
};
if (!call_on_basic_types<true, false, true, false>(left_number, right_number, call)) {
return Status::RuntimeError(fmt::format("Wrong call for {} with {} and {}", get_name(),
col_left.type->get_name(),
col_right.type->get_name()));
}
return Status::OK();
}
void execute_generic_identical_types(Block& block, size_t result, const IColumn* c0,
const IColumn* c1) {
bool c0_const = is_column_const(*c0);
bool c1_const = is_column_const(*c1);
if (c0_const && c1_const) {
UInt8 res = 0;
GenericComparisonImpl<Op<int, int>>::constant_constant(*c0, *c1, res);
block.replace_by_position(
result, DataTypeUInt8().create_column_const(c0->size(), to_field(res)));
} else {
auto c_res = ColumnUInt8::create();
ColumnUInt8::Container& vec_res = c_res->get_data();
vec_res.resize(c0->size());
if (c0_const)
GenericComparisonImpl<Op<int, int>>::constant_vector(*c0, *c1, vec_res);
else if (c1_const)
GenericComparisonImpl<Op<int, int>>::vector_constant(*c0, *c1, vec_res);
else
GenericComparisonImpl<Op<int, int>>::vector_vector(*c0, *c1, vec_res);
block.replace_by_position(result, std::move(c_res));
}
}
Status execute_generic(Block& block, size_t result, const ColumnWithTypeAndName& c0,
const ColumnWithTypeAndName& c1) {
execute_generic_identical_types(block, result, c0.column.get(), c1.column.get());
return Status::OK();
}
public:
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
/// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeUInt8>();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
const auto& col_with_type_and_name_left = block.get_by_position(arguments[0]);
const auto& col_with_type_and_name_right = block.get_by_position(arguments[1]);
const IColumn* col_left_untyped = col_with_type_and_name_left.column.get();
const IColumn* col_right_untyped = col_with_type_and_name_right.column.get();
const DataTypePtr& left_type = col_with_type_and_name_left.type;
const DataTypePtr& right_type = col_with_type_and_name_right.type;
/// The case when arguments are the same (tautological comparison). Return constant.
/// NOTE: Nullable types are special case. (BTW, this function use default implementation for Nullable, so Nullable types cannot be here. Check just in case.)
/// NOTE: We consider NaN comparison to be implementation specific (and in our implementation NaNs are sometimes equal sometimes not).
if (left_type->equals(*right_type) && !left_type->is_nullable() &&
col_left_untyped == col_right_untyped) {
/// Always true: =, <=, >=
// TODO: Return const column in the future
if constexpr (std::is_same_v<Op<int, int>, EqualsOp<int, int>> ||
std::is_same_v<Op<int, int>, LessOrEqualsOp<int, int>> ||
std::is_same_v<Op<int, int>, GreaterOrEqualsOp<int, int>>) {
block.get_by_position(result).column =
DataTypeUInt8()
.create_column_const(input_rows_count, 1u)
->convert_to_full_column_if_const();
return Status::OK();
} else {
block.get_by_position(result).column =
DataTypeUInt8()
.create_column_const(input_rows_count, 0u)
->convert_to_full_column_if_const();
return Status::OK();
}
}
WhichDataType which_left {left_type};
WhichDataType which_right {right_type};
const bool left_is_num = col_left_untyped->is_numeric();
const bool right_is_num = col_right_untyped->is_numeric();
// Compare date and datetime direct use the Int64 compare. Keep the comment
// may we should refactor the code.
// bool date_and_datetime = (left_type != right_type) && which_left.is_date_or_datetime() &&
// which_right.is_date_or_datetime();
if (left_is_num && right_is_num) {
if (!(execute_num_left_type<UInt8>(block, result, col_left_untyped,
col_right_untyped) ||
execute_num_left_type<UInt16>(block, result, col_left_untyped,
col_right_untyped) ||
execute_num_left_type<UInt32>(block, result, col_left_untyped,
col_right_untyped) ||
execute_num_left_type<UInt64>(block, result, col_left_untyped,
col_right_untyped) ||
execute_num_left_type<Int8>(block, result, col_left_untyped, col_right_untyped) ||
execute_num_left_type<Int16>(block, result, col_left_untyped,
col_right_untyped) ||
execute_num_left_type<Int32>(block, result, col_left_untyped,
col_right_untyped) ||
execute_num_left_type<Int64>(block, result, col_left_untyped,
col_right_untyped) ||
execute_num_left_type<Int128>(block, result, col_left_untyped,
col_right_untyped) ||
execute_num_left_type<Float32>(block, result, col_left_untyped,
col_right_untyped) ||
execute_num_left_type<Float64>(block, result, col_left_untyped,
col_right_untyped)))
return Status::RuntimeError(
fmt::format("Illegal column {} of first argument of function {}",
col_left_untyped->get_name(), get_name()));
} else if (is_decimal(left_type) || is_decimal(right_type)) {
if (!allow_decimal_comparison(left_type, right_type)) {
return Status::RuntimeError(fmt::format("No operation {} between {} and {}",
get_name(), left_type->get_name(),
right_type->get_name()));
}
return execute_decimal(block, result, col_with_type_and_name_left,
col_with_type_and_name_right);
} else {
// TODO: varchar and string maybe need a quickly way
return execute_generic(block, result, col_with_type_and_name_left,
col_with_type_and_name_right);
}
return Status::OK();
}
};
} // namespace doris::vectorized