Files
doris/be/src/vec/functions/functions_logical.cpp
chenlinzhong c9961c9bb9 [style] clang-format all c++ code (#9305)
- sh build-support/clang-format.sh  to  clang-format all c++ code
2022-04-29 16:14:22 +08:00

532 lines
21 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsLogical.cpp
// and modified by Doris
#include "vec/functions/functions_logical.h"
#include <algorithm>
#include "vec/columns/column.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/common/field_visitors.h"
#include "vec/common/typeid_cast.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/functions/function_helpers.h"
#include "vec/functions/simple_function_factory.h"
namespace doris::vectorized {
namespace {
using namespace FunctionsLogicalDetail;
using UInt8Container = ColumnUInt8::Container;
using UInt8ColumnPtrs = std::vector<const ColumnUInt8*>;
MutableColumnPtr convert_from_ternary_data(const UInt8Container& ternary_data,
const bool make_nullable) {
const size_t rows_count = ternary_data.size();
auto new_column = ColumnUInt8::create(rows_count);
std::transform(ternary_data.cbegin(), ternary_data.cend(), new_column->get_data().begin(),
[](const auto x) { return x == Ternary::True; });
if (!make_nullable) return new_column;
auto null_column = ColumnUInt8::create(rows_count);
std::transform(ternary_data.cbegin(), ternary_data.cend(), null_column->get_data().begin(),
[](const auto x) { return x == Ternary::Null; });
return ColumnNullable::create(std::move(new_column), std::move(null_column));
}
template <typename T>
bool try_convert_column_to_uint8(const IColumn* column, UInt8Container& res) {
const auto col = check_and_get_column<ColumnVector<T>>(column);
if (!col) return false;
std::transform(col->get_data().cbegin(), col->get_data().cend(), res.begin(),
[](const auto x) { return x != 0; });
return true;
}
void convert_column_to_uint8(const IColumn* column, UInt8Container& res) {
if (!try_convert_column_to_uint8<Int8>(column, res) &&
!try_convert_column_to_uint8<Int16>(column, res) &&
!try_convert_column_to_uint8<Int32>(column, res) &&
!try_convert_column_to_uint8<Int64>(column, res) &&
!try_convert_column_to_uint8<UInt16>(column, res) &&
!try_convert_column_to_uint8<UInt32>(column, res) &&
!try_convert_column_to_uint8<UInt64>(column, res) &&
!try_convert_column_to_uint8<Float32>(column, res) &&
!try_convert_column_to_uint8<Float64>(column, res))
LOG(FATAL) << "Unexpected type of column: " << column->get_name();
}
template <class Op, typename Func>
static bool extract_const_columns(ColumnRawPtrs& in, UInt8& res, Func&& func) {
bool has_res = false;
for (int i = static_cast<int>(in.size()) - 1; i >= 0; --i) {
if (!is_column_const(*in[i])) continue;
UInt8 x = func((*in[i])[0]);
if (has_res) {
res = Op::apply(res, x);
} else {
res = x;
has_res = true;
}
in.erase(in.begin() + i);
}
return has_res;
}
template <class Op>
inline bool extract_const_columns(ColumnRawPtrs& in, UInt8& res) {
return extract_const_columns<Op>(in, res, [](const Field& value) {
return !value.is_null() && apply_visitor(FieldVisitorConvertToNumber<bool>(), value);
});
}
template <class Op>
inline bool extract_const_columns_ternary(ColumnRawPtrs& in, UInt8& res_3v) {
return extract_const_columns<Op>(in, res_3v, [](const Field& value) {
return value.is_null() ? Ternary::make_value(false, true)
: Ternary::make_value(
apply_visitor(FieldVisitorConvertToNumber<bool>(), value));
});
}
template <typename Op, size_t N>
class AssociativeApplierImpl {
using ResultValueType = typename Op::ResultType;
public:
/// Remembers the last N columns from `in`.
AssociativeApplierImpl(const UInt8ColumnPtrs& in)
: vec(in[in.size() - N]->get_data()), next(in) {}
/// Returns a combination of values in the i-th row of all columns stored in the constructor.
ResultValueType apply(const size_t i) const {
const auto& a = vec[i];
if constexpr (Op::is_saturable())
return Op::is_saturated_value(a) ? a : Op::apply(a, next.apply(i));
else
return Op::apply(a, next.apply(i));
}
private:
const UInt8Container& vec;
const AssociativeApplierImpl<Op, N - 1> next;
};
template <typename Op>
class AssociativeApplierImpl<Op, 1> {
using ResultValueType = typename Op::ResultType;
public:
AssociativeApplierImpl(const UInt8ColumnPtrs& in) : vec(in[in.size() - 1]->get_data()) {}
ResultValueType apply(const size_t i) const { return vec[i]; }
private:
const UInt8Container& vec;
};
/// A helper class used by AssociativeGenericApplierImpl
/// Allows for on-the-fly conversion of any data type into intermediate ternary representation
using ValueGetter = std::function<Ternary::ResultType(size_t)>;
template <typename... Types>
struct ValueGetterBuilderImpl;
template <typename Type, typename... Types>
struct ValueGetterBuilderImpl<Type, Types...> {
static ValueGetter build(const IColumn* x) {
if (const auto nullable_column = typeid_cast<const ColumnNullable*>(x)) {
if (const auto nested_column = typeid_cast<const ColumnVector<Type>*>(
nullable_column->get_nested_column_ptr().get())) {
return [&null_data = nullable_column->get_null_map_data(),
&column_data = nested_column->get_data()](size_t i) {
return Ternary::make_value(column_data[i], null_data[i]);
};
} else
return ValueGetterBuilderImpl<Types...>::build(x);
} else if (const auto column = typeid_cast<const ColumnVector<Type>*>(x))
return [&column_data = column->get_data()](size_t i) {
return Ternary::make_value(column_data[i]);
};
else
return ValueGetterBuilderImpl<Types...>::build(x);
}
};
template <>
struct ValueGetterBuilderImpl<> {
[[noreturn]] static ValueGetter build(const IColumn* x) {
LOG(FATAL) << "Unknown numeric column of type: " << demangle(typeid(x).name());
}
};
using ValueGetterBuilder = ValueGetterBuilderImpl<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32,
Int64, Float32, Float64>;
/// This class together with helper class ValueGetterBuilder can be used with columns of arbitrary data type
/// Allows for on-the-fly conversion of any type of data into intermediate ternary representation
/// and eliminates the need to materialize data columns in intermediate representation
template <typename Op, size_t N>
class AssociativeGenericApplierImpl {
using ResultValueType = typename Op::ResultType;
public:
/// Remembers the last N columns from `in`.
AssociativeGenericApplierImpl(const ColumnRawPtrs& in)
: val_getter {ValueGetterBuilder::build(in[in.size() - N])}, next {in} {}
/// Returns a combination of values in the i-th row of all columns stored in the constructor.
ResultValueType apply(const size_t i) const {
const auto a = val_getter(i);
if constexpr (Op::is_saturable())
return Op::is_saturated_value(a) ? a : Op::apply(a, next.apply(i));
else
return Op::apply(a, next.apply(i));
}
private:
const ValueGetter val_getter;
const AssociativeGenericApplierImpl<Op, N - 1> next;
};
template <typename Op>
class AssociativeGenericApplierImpl<Op, 1> {
using ResultValueType = typename Op::ResultType;
public:
/// Remembers the last N columns from `in`.
AssociativeGenericApplierImpl(const ColumnRawPtrs& in)
: val_getter {ValueGetterBuilder::build(in[in.size() - 1])} {}
inline ResultValueType apply(const size_t i) const { return val_getter(i); }
private:
const ValueGetter val_getter;
};
/// Apply target function by feeding it "batches" of N columns
/// Combining 10 columns per pass is the fastest for large block sizes.
/// For small block sizes - more columns is faster.
template <typename Op, template <typename, size_t> typename OperationApplierImpl, size_t N = 10>
struct OperationApplier {
template <typename Columns, typename ResultColumn>
static void apply(Columns& in, ResultColumn& result) {
while (in.size() > 1) {
do_batched_apply(in, result->get_data());
in.push_back(result.get());
}
}
template <typename Columns, typename ResultData>
static void NO_INLINE do_batched_apply(Columns& in, ResultData& result_data) {
if (N > in.size()) {
OperationApplier<Op, OperationApplierImpl, N - 1>::do_batched_apply(in, result_data);
return;
}
const OperationApplierImpl<Op, N> operationApplierImpl(in);
size_t i = 0;
for (auto& res : result_data) res = operationApplierImpl.apply(i++);
in.erase(in.end() - N, in.end());
}
};
template <typename Op, template <typename, size_t> typename OperationApplierImpl>
struct OperationApplier<Op, OperationApplierImpl, 1> {
template <typename Columns, typename Result>
static void NO_INLINE do_batched_apply(Columns&, Result&) {
LOG(FATAL) << "OperationApplier<...>::apply(...): not enough arguments to run this method";
}
};
template <class Op>
static void execute_for_ternary_logic_impl(ColumnRawPtrs arguments,
ColumnWithTypeAndName& result_info,
size_t input_rows_count) {
/// Combine all constant columns into a single constant value.
UInt8 const_3v_value = 0;
const bool has_consts = extract_const_columns_ternary<Op>(arguments, const_3v_value);
/// If the constant value uniquely determines the result, return it.
if (has_consts &&
(arguments.empty() || (Op::is_saturable() && Op::is_saturated_value(const_3v_value)))) {
result_info.column =
ColumnConst::create(convert_from_ternary_data(UInt8Container({const_3v_value}),
result_info.type->is_nullable()),
input_rows_count);
return;
}
const auto result_column = ColumnUInt8::create(input_rows_count);
MutableColumnPtr const_column_holder;
if (has_consts) {
const_column_holder = convert_from_ternary_data(
UInt8Container(input_rows_count, const_3v_value), const_3v_value == Ternary::Null);
arguments.push_back(const_column_holder.get());
}
OperationApplier<Op, AssociativeGenericApplierImpl>::apply(arguments, result_column);
result_info.column =
convert_from_ternary_data(result_column->get_data(), result_info.type->is_nullable());
}
template <typename Op, typename... Types>
struct TypedExecutorInvoker;
template <typename Op>
using FastApplierImpl = TypedExecutorInvoker<Op, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32,
Int64, Float32, Float64>;
template <typename Op, typename Type, typename... Types>
struct TypedExecutorInvoker<Op, Type, Types...> {
template <typename T, typename Result>
static void apply(const ColumnVector<T>& x, const IColumn& y, Result& result) {
if (const auto column = typeid_cast<const ColumnVector<Type>*>(&y))
std::transform(x.get_data().cbegin(), x.get_data().cend(), column->get_data().cbegin(),
result.begin(),
[](const auto a, const auto b) { return Op::apply(!!a, !!b); });
else
TypedExecutorInvoker<Op, Types...>::template apply<T>(x, y, result);
}
template <typename Result>
static void apply(const IColumn& x, const IColumn& y, Result& result) {
if (const auto column = typeid_cast<const ColumnVector<Type>*>(&x))
FastApplierImpl<Op>::template apply<Type>(*column, y, result);
else
TypedExecutorInvoker<Op, Types...>::apply(x, y, result);
}
};
template <typename Op>
struct TypedExecutorInvoker<Op> {
template <typename T, typename Result>
static void apply(const ColumnVector<T>&, const IColumn& y, Result&) {
LOG(FATAL) << "Unknown numeric column y of type: " << demangle(typeid(y).name());
}
template <typename Result>
static void apply(const IColumn& x, const IColumn&, Result&) {
LOG(FATAL) << "Unknown numeric column x of type: " << demangle(typeid(x).name());
}
};
template <class Op>
static void basic_execute_impl(ColumnRawPtrs arguments, ColumnWithTypeAndName& result_info,
size_t input_rows_count) {
/// Combine all constant columns into a single constant value.
UInt8 const_val = 0;
bool has_consts = extract_const_columns<Op>(arguments, const_val);
/// If the constant value uniquely determines the result, return it.
if (has_consts && (arguments.empty() || Op::apply(const_val, 0) == Op::apply(const_val, 1))) {
if (!arguments.empty()) const_val = Op::apply(const_val, 0);
result_info.column =
DataTypeUInt8().create_column_const(input_rows_count, to_field(const_val));
return;
}
/// If the constant value is a neutral element, let's forget about it.
if (has_consts && Op::apply(const_val, 0) == 0 && Op::apply(const_val, 1) == 1)
has_consts = false;
UInt8ColumnPtrs uint8_args;
auto col_res = ColumnUInt8::create();
UInt8Container& vec_res = col_res->get_data();
if (has_consts) {
vec_res.assign(input_rows_count, const_val);
uint8_args.push_back(col_res.get());
} else {
vec_res.resize(input_rows_count);
}
/// FastPath detection goes in here
if (arguments.size() == (has_consts ? 1 : 2)) {
if (has_consts)
FastApplierImpl<Op>::apply(*arguments[0], *col_res, col_res->get_data());
else
FastApplierImpl<Op>::apply(*arguments[0], *arguments[1], col_res->get_data());
result_info.column = std::move(col_res);
return;
}
/// Convert all columns to UInt8
Columns converted_columns;
for (const IColumn* column : arguments) {
if (auto uint8_column = check_and_get_column<ColumnUInt8>(column))
uint8_args.push_back(uint8_column);
else {
auto converted_column = ColumnUInt8::create(input_rows_count);
convert_column_to_uint8(column, converted_column->get_data());
uint8_args.push_back(converted_column.get());
converted_columns.emplace_back(std::move(converted_column));
}
}
OperationApplier<Op, AssociativeApplierImpl>::apply(uint8_args, col_res);
/// This is possible if there is exactly one non-constant among the arguments, and it is of type UInt8.
if (uint8_args[0] != col_res.get()) vec_res.assign(uint8_args[0]->get_data());
result_info.column = std::move(col_res);
}
} // namespace
template <typename Impl, typename Name>
DataTypePtr FunctionAnyArityLogical<Impl, Name>::get_return_type_impl(
const DataTypes& arguments) const {
if (arguments.size() < 2) {
LOG(FATAL) << fmt::format(
"Number of arguments for function \"{}\" should be at least 2: passed {}",
get_name(), arguments.size());
}
bool has_nullable_arguments = false;
for (size_t i = 0; i < arguments.size(); ++i) {
const auto& arg_type = arguments[i];
if (!has_nullable_arguments) {
has_nullable_arguments = arg_type->is_nullable();
if (has_nullable_arguments && !Impl::special_implementation_for_nulls()) {
LOG(WARNING) << fmt::format(
"Logical error: Unexpected type of argument for function \"{}\" argument "
"{} is of type {}",
get_name(), i + 1, arg_type->get_name());
}
}
if (!(is_native_number(arg_type) ||
(Impl::special_implementation_for_nulls() &&
(arg_type->only_null() || is_native_number(remove_nullable(arg_type)))))) {
LOG(FATAL) << fmt::format("Illegal type ({}) of {} argument of function {}",
arg_type->get_name(), i + 1, get_name());
}
}
auto result_type = std::make_shared<DataTypeUInt8>();
return has_nullable_arguments ? make_nullable(result_type) : result_type;
}
template <typename Impl, typename Name>
Status FunctionAnyArityLogical<Impl, Name>::execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments,
size_t result_index,
size_t input_rows_count) {
ColumnRawPtrs args_in;
for (const auto arg_index : arguments)
args_in.push_back(block.get_by_position(arg_index).column.get());
auto& result_info = block.get_by_position(result_index);
if (result_info.type->is_nullable())
execute_for_ternary_logic_impl<Impl>(std::move(args_in), result_info, input_rows_count);
else
basic_execute_impl<Impl>(std::move(args_in), result_info, input_rows_count);
return Status::OK();
}
template <typename A, typename Op>
struct UnaryOperationImpl {
using ResultType = typename Op::ResultType;
using ArrayA = typename ColumnVector<A>::Container;
using ArrayC = typename ColumnVector<ResultType>::Container;
static void NO_INLINE vector(const ArrayA& a, ArrayC& c) {
std::transform(a.cbegin(), a.cend(), c.begin(), [](const auto x) { return Op::apply(x); });
}
};
template <template <typename> class Impl, typename Name>
DataTypePtr FunctionUnaryLogical<Impl, Name>::get_return_type_impl(
const DataTypes& arguments) const {
if (!is_native_number(arguments[0])) {
LOG(FATAL) << fmt::format("Illegal type ({}) of argument of function {}",
arguments[0]->get_name(), get_name());
}
return std::make_shared<DataTypeUInt8>();
}
template <template <typename> class Impl, typename T>
bool functionUnaryExecuteType(Block& block, const ColumnNumbers& arguments, size_t result) {
if (auto col = check_and_get_column<ColumnVector<T>>(
block.get_by_position(arguments[0]).column.get())) {
auto col_res = ColumnUInt8::create();
typename ColumnUInt8::Container& vec_res = col_res->get_data();
vec_res.resize(col->get_data().size());
UnaryOperationImpl<T, Impl<T>>::vector(col->get_data(), vec_res);
block.replace_by_position(result, std::move(col_res));
return true;
}
return false;
}
template <template <typename> class Impl, typename Name>
Status FunctionUnaryLogical<Impl, Name>::execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t /*input_rows_count*/) {
if (!(functionUnaryExecuteType<Impl, UInt8>(block, arguments, result) ||
functionUnaryExecuteType<Impl, UInt16>(block, arguments, result) ||
functionUnaryExecuteType<Impl, UInt32>(block, arguments, result) ||
functionUnaryExecuteType<Impl, UInt64>(block, arguments, result) ||
functionUnaryExecuteType<Impl, Int8>(block, arguments, result) ||
functionUnaryExecuteType<Impl, Int16>(block, arguments, result) ||
functionUnaryExecuteType<Impl, Int32>(block, arguments, result) ||
functionUnaryExecuteType<Impl, Int64>(block, arguments, result) ||
functionUnaryExecuteType<Impl, Float32>(block, arguments, result) ||
functionUnaryExecuteType<Impl, Float64>(block, arguments, result))) {
LOG(FATAL) << fmt::format("Illegal column {} of argument of function {}",
block.get_by_position(arguments[0]).column->get_name(),
get_name());
}
return Status::OK();
}
void register_function_logical(SimpleFunctionFactory& instance) {
instance.register_function<FunctionAnd>();
instance.register_function<FunctionOr>();
instance.register_function<FunctionXor>();
instance.register_function<FunctionNot>();
}
} // namespace doris::vectorized