// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. // This file is copied from // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsLogical.cpp // and modified by Doris #include "vec/functions/functions_logical.h" #include #include "vec/columns/column.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" #include "vec/common/field_visitors.h" #include "vec/common/typeid_cast.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/functions/function_helpers.h" #include "vec/functions/simple_function_factory.h" namespace doris::vectorized { namespace { using namespace FunctionsLogicalDetail; using UInt8Container = ColumnUInt8::Container; using UInt8ColumnPtrs = std::vector; MutableColumnPtr convert_from_ternary_data(const UInt8Container& ternary_data, const bool make_nullable) { const size_t rows_count = ternary_data.size(); auto new_column = ColumnUInt8::create(rows_count); std::transform(ternary_data.cbegin(), ternary_data.cend(), new_column->get_data().begin(), [](const auto x) { return x == Ternary::True; }); if (!make_nullable) return new_column; auto null_column = ColumnUInt8::create(rows_count); std::transform(ternary_data.cbegin(), ternary_data.cend(), null_column->get_data().begin(), [](const auto x) { return x == Ternary::Null; }); return ColumnNullable::create(std::move(new_column), std::move(null_column)); } template bool try_convert_column_to_uint8(const IColumn* column, UInt8Container& res) { const auto col = check_and_get_column>(column); if (!col) return false; std::transform(col->get_data().cbegin(), col->get_data().cend(), res.begin(), [](const auto x) { return x != 0; }); return true; } void convert_column_to_uint8(const IColumn* column, UInt8Container& res) { if (!try_convert_column_to_uint8(column, res) && !try_convert_column_to_uint8(column, res) && !try_convert_column_to_uint8(column, res) && !try_convert_column_to_uint8(column, res) && !try_convert_column_to_uint8(column, res) && !try_convert_column_to_uint8(column, res) && !try_convert_column_to_uint8(column, res) && !try_convert_column_to_uint8(column, res) && !try_convert_column_to_uint8(column, res)) LOG(FATAL) << "Unexpected type of column: " << column->get_name(); } template static bool extract_const_columns(ColumnRawPtrs& in, UInt8& res, Func&& func) { bool has_res = false; for (int i = static_cast(in.size()) - 1; i >= 0; --i) { if (!is_column_const(*in[i])) continue; UInt8 x = func((*in[i])[0]); if (has_res) { res = Op::apply(res, x); } else { res = x; has_res = true; } in.erase(in.begin() + i); } return has_res; } template inline bool extract_const_columns(ColumnRawPtrs& in, UInt8& res) { return extract_const_columns(in, res, [](const Field& value) { return !value.is_null() && apply_visitor(FieldVisitorConvertToNumber(), value); }); } template inline bool extract_const_columns_ternary(ColumnRawPtrs& in, UInt8& res_3v) { return extract_const_columns(in, res_3v, [](const Field& value) { return value.is_null() ? Ternary::make_value(false, true) : Ternary::make_value( apply_visitor(FieldVisitorConvertToNumber(), value)); }); } template class AssociativeApplierImpl { using ResultValueType = typename Op::ResultType; public: /// Remembers the last N columns from `in`. AssociativeApplierImpl(const UInt8ColumnPtrs& in) : vec(in[in.size() - N]->get_data()), next(in) {} /// Returns a combination of values in the i-th row of all columns stored in the constructor. ResultValueType apply(const size_t i) const { const auto& a = vec[i]; if constexpr (Op::is_saturable()) return Op::is_saturated_value(a) ? a : Op::apply(a, next.apply(i)); else return Op::apply(a, next.apply(i)); } private: const UInt8Container& vec; const AssociativeApplierImpl next; }; template class AssociativeApplierImpl { using ResultValueType = typename Op::ResultType; public: AssociativeApplierImpl(const UInt8ColumnPtrs& in) : vec(in[in.size() - 1]->get_data()) {} ResultValueType apply(const size_t i) const { return vec[i]; } private: const UInt8Container& vec; }; /// A helper class used by AssociativeGenericApplierImpl /// Allows for on-the-fly conversion of any data type into intermediate ternary representation using ValueGetter = std::function; template struct ValueGetterBuilderImpl; template struct ValueGetterBuilderImpl { static ValueGetter build(const IColumn* x) { if (const auto nullable_column = typeid_cast(x)) { if (const auto nested_column = typeid_cast*>( nullable_column->get_nested_column_ptr().get())) { return [&null_data = nullable_column->get_null_map_data(), &column_data = nested_column->get_data()](size_t i) { return Ternary::make_value(column_data[i], null_data[i]); }; } else return ValueGetterBuilderImpl::build(x); } else if (const auto column = typeid_cast*>(x)) return [&column_data = column->get_data()](size_t i) { return Ternary::make_value(column_data[i]); }; else return ValueGetterBuilderImpl::build(x); } }; template <> struct ValueGetterBuilderImpl<> { [[noreturn]] static ValueGetter build(const IColumn* x) { LOG(FATAL) << "Unknown numeric column of type: " << demangle(typeid(x).name()); } }; using ValueGetterBuilder = ValueGetterBuilderImpl; /// This class together with helper class ValueGetterBuilder can be used with columns of arbitrary data type /// Allows for on-the-fly conversion of any type of data into intermediate ternary representation /// and eliminates the need to materialize data columns in intermediate representation template class AssociativeGenericApplierImpl { using ResultValueType = typename Op::ResultType; public: /// Remembers the last N columns from `in`. AssociativeGenericApplierImpl(const ColumnRawPtrs& in) : val_getter {ValueGetterBuilder::build(in[in.size() - N])}, next {in} {} /// Returns a combination of values in the i-th row of all columns stored in the constructor. ResultValueType apply(const size_t i) const { const auto a = val_getter(i); if constexpr (Op::is_saturable()) return Op::is_saturated_value(a) ? a : Op::apply(a, next.apply(i)); else return Op::apply(a, next.apply(i)); } private: const ValueGetter val_getter; const AssociativeGenericApplierImpl next; }; template class AssociativeGenericApplierImpl { using ResultValueType = typename Op::ResultType; public: /// Remembers the last N columns from `in`. AssociativeGenericApplierImpl(const ColumnRawPtrs& in) : val_getter {ValueGetterBuilder::build(in[in.size() - 1])} {} inline ResultValueType apply(const size_t i) const { return val_getter(i); } private: const ValueGetter val_getter; }; /// Apply target function by feeding it "batches" of N columns /// Combining 10 columns per pass is the fastest for large block sizes. /// For small block sizes - more columns is faster. template typename OperationApplierImpl, size_t N = 10> struct OperationApplier { template static void apply(Columns& in, ResultColumn& result) { while (in.size() > 1) { do_batched_apply(in, result->get_data()); in.push_back(result.get()); } } template static void NO_INLINE do_batched_apply(Columns& in, ResultData& result_data) { if (N > in.size()) { OperationApplier::do_batched_apply(in, result_data); return; } const OperationApplierImpl operationApplierImpl(in); size_t i = 0; for (auto& res : result_data) res = operationApplierImpl.apply(i++); in.erase(in.end() - N, in.end()); } }; template typename OperationApplierImpl> struct OperationApplier { template static void NO_INLINE do_batched_apply(Columns&, Result&) { LOG(FATAL) << "OperationApplier<...>::apply(...): not enough arguments to run this method"; } }; template static void execute_for_ternary_logic_impl(ColumnRawPtrs arguments, ColumnWithTypeAndName& result_info, size_t input_rows_count) { /// Combine all constant columns into a single constant value. UInt8 const_3v_value = 0; const bool has_consts = extract_const_columns_ternary(arguments, const_3v_value); /// If the constant value uniquely determines the result, return it. if (has_consts && (arguments.empty() || (Op::is_saturable() && Op::is_saturated_value(const_3v_value)))) { result_info.column = ColumnConst::create(convert_from_ternary_data(UInt8Container({const_3v_value}), result_info.type->is_nullable()), input_rows_count); return; } const auto result_column = ColumnUInt8::create(input_rows_count); MutableColumnPtr const_column_holder; if (has_consts) { const_column_holder = convert_from_ternary_data( UInt8Container(input_rows_count, const_3v_value), const_3v_value == Ternary::Null); arguments.push_back(const_column_holder.get()); } OperationApplier::apply(arguments, result_column); result_info.column = convert_from_ternary_data(result_column->get_data(), result_info.type->is_nullable()); } template struct TypedExecutorInvoker; template using FastApplierImpl = TypedExecutorInvoker; template struct TypedExecutorInvoker { template static void apply(const ColumnVector& x, const IColumn& y, Result& result) { if (const auto column = typeid_cast*>(&y)) std::transform(x.get_data().cbegin(), x.get_data().cend(), column->get_data().cbegin(), result.begin(), [](const auto a, const auto b) { return Op::apply(!!a, !!b); }); else TypedExecutorInvoker::template apply(x, y, result); } template static void apply(const IColumn& x, const IColumn& y, Result& result) { if (const auto column = typeid_cast*>(&x)) FastApplierImpl::template apply(*column, y, result); else TypedExecutorInvoker::apply(x, y, result); } }; template struct TypedExecutorInvoker { template static void apply(const ColumnVector&, const IColumn& y, Result&) { LOG(FATAL) << "Unknown numeric column y of type: " << demangle(typeid(y).name()); } template static void apply(const IColumn& x, const IColumn&, Result&) { LOG(FATAL) << "Unknown numeric column x of type: " << demangle(typeid(x).name()); } }; template static void basic_execute_impl(ColumnRawPtrs arguments, ColumnWithTypeAndName& result_info, size_t input_rows_count) { /// Combine all constant columns into a single constant value. UInt8 const_val = 0; bool has_consts = extract_const_columns(arguments, const_val); /// If the constant value uniquely determines the result, return it. if (has_consts && (arguments.empty() || Op::apply(const_val, 0) == Op::apply(const_val, 1))) { if (!arguments.empty()) const_val = Op::apply(const_val, 0); result_info.column = DataTypeUInt8().create_column_const(input_rows_count, to_field(const_val)); return; } /// If the constant value is a neutral element, let's forget about it. if (has_consts && Op::apply(const_val, 0) == 0 && Op::apply(const_val, 1) == 1) has_consts = false; UInt8ColumnPtrs uint8_args; auto col_res = ColumnUInt8::create(); UInt8Container& vec_res = col_res->get_data(); if (has_consts) { vec_res.assign(input_rows_count, const_val); uint8_args.push_back(col_res.get()); } else { vec_res.resize(input_rows_count); } /// FastPath detection goes in here if (arguments.size() == (has_consts ? 1 : 2)) { if (has_consts) FastApplierImpl::apply(*arguments[0], *col_res, col_res->get_data()); else FastApplierImpl::apply(*arguments[0], *arguments[1], col_res->get_data()); result_info.column = std::move(col_res); return; } /// Convert all columns to UInt8 Columns converted_columns; for (const IColumn* column : arguments) { if (auto uint8_column = check_and_get_column(column)) uint8_args.push_back(uint8_column); else { auto converted_column = ColumnUInt8::create(input_rows_count); convert_column_to_uint8(column, converted_column->get_data()); uint8_args.push_back(converted_column.get()); converted_columns.emplace_back(std::move(converted_column)); } } OperationApplier::apply(uint8_args, col_res); /// This is possible if there is exactly one non-constant among the arguments, and it is of type UInt8. if (uint8_args[0] != col_res.get()) vec_res.assign(uint8_args[0]->get_data()); result_info.column = std::move(col_res); } } // namespace template DataTypePtr FunctionAnyArityLogical::get_return_type_impl( const DataTypes& arguments) const { if (arguments.size() < 2) { LOG(FATAL) << fmt::format( "Number of arguments for function \"{}\" should be at least 2: passed {}", get_name(), arguments.size()); } bool has_nullable_arguments = false; for (size_t i = 0; i < arguments.size(); ++i) { const auto& arg_type = arguments[i]; if (!has_nullable_arguments) { has_nullable_arguments = arg_type->is_nullable(); if (has_nullable_arguments && !Impl::special_implementation_for_nulls()) { LOG(WARNING) << fmt::format( "Logical error: Unexpected type of argument for function \"{}\" argument " "{} is of type {}", get_name(), i + 1, arg_type->get_name()); } } if (!(is_native_number(arg_type) || (Impl::special_implementation_for_nulls() && (arg_type->only_null() || is_native_number(remove_nullable(arg_type)))))) { LOG(FATAL) << fmt::format("Illegal type ({}) of {} argument of function {}", arg_type->get_name(), i + 1, get_name()); } } auto result_type = std::make_shared(); return has_nullable_arguments ? make_nullable(result_type) : result_type; } template Status FunctionAnyArityLogical::execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result_index, size_t input_rows_count) { ColumnRawPtrs args_in; for (const auto arg_index : arguments) args_in.push_back(block.get_by_position(arg_index).column.get()); auto& result_info = block.get_by_position(result_index); if (result_info.type->is_nullable()) execute_for_ternary_logic_impl(std::move(args_in), result_info, input_rows_count); else basic_execute_impl(std::move(args_in), result_info, input_rows_count); return Status::OK(); } template struct UnaryOperationImpl { using ResultType = typename Op::ResultType; using ArrayA = typename ColumnVector::Container; using ArrayC = typename ColumnVector::Container; static void NO_INLINE vector(const ArrayA& a, ArrayC& c) { std::transform(a.cbegin(), a.cend(), c.begin(), [](const auto x) { return Op::apply(x); }); } }; template