// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. // This file is copied from // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionBitmap.h // and modified by Doris #include "util/string_parser.hpp" #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/columns_number.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_quantilestate.h" #include "vec/functions/function_always_not_nullable.h" #include "vec/functions/function_const.h" #include "vec/functions/function_string.h" #include "vec/functions/simple_function_factory.h" namespace doris::vectorized { template struct QuantileStateEmpty { static constexpr auto name = "quantile_state_empty"; using ReturnColVec = ColumnQuantileState; static DataTypePtr get_return_type() { return std::make_shared>(); } static auto init_value() { return QuantileState {}; } }; template class FunctionToQuantileState : public IFunction { public: static constexpr auto name = "to_quantile_state"; String get_name() const override { return name; } static FunctionPtr create() { return std::make_shared>(); } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return std::make_shared>(); } size_t get_number_of_arguments() const override { return 2; } bool use_default_implementation_for_nulls() const override { return false; } bool use_default_implementation_for_constants() const override { return true; } template Status execute_internal(const ColumnPtr& column, const DataTypePtr& data_type, MutableColumnPtr& column_result) { auto type_error = [&]() { return Status::RuntimeError("Illegal column {} of argument of function {}", column->get_name(), get_name()); }; const ColumnNullable* col_nullable = nullptr; const ColumnUInt8* col_nullmap = nullptr; const ColumnType* col = nullptr; const NullMap* nullmap = nullptr; if constexpr (is_nullable) { col_nullable = check_and_get_column(column.get()); col_nullmap = check_and_get_column( col_nullable->get_null_map_column_ptr().get()); col = check_and_get_column(col_nullable->get_nested_column_ptr().get()); if (col == nullptr || col_nullmap == nullptr) { return type_error(); } nullmap = &col_nullmap->get_data(); } else { col = check_and_get_column(column.get()); } auto* res_column = reinterpret_cast*>(column_result.get()); auto& res_data = res_column->get_data(); size_t size = col->size(); for (size_t i = 0; i < size; ++i) { if constexpr (is_nullable) { if ((*nullmap)[i]) { continue; } } if constexpr (std::is_same_v) { const ColumnString::Chars& data = col->get_chars(); const ColumnString::Offsets& offsets = col->get_offsets(); const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); size_t str_size = offsets[i] - offsets[i - 1]; StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; InternalType value = StringParser::string_to_float(raw_str, str_size, &parse_result); if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { res_data[i].add_value(value); } else { std::stringstream ss; ss << "The input column content: " << std::string(raw_str, str_size) << " is not valid in function: " << get_name(); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } } else if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) { // InternalType only can be double or float, so we can cast directly InternalType value = (InternalType)col->get_data()[i]; res_data[i].set_compression(compression); res_data[i].add_value(value); } else { type_error(); } } return Status::OK(); } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { if constexpr (!(std::is_same_v || std::is_same_v)) { std::stringstream ss; ss << "The InternalType of quantile_state must be float or double"; return Status::InternalError(ss.str()); } const ColumnPtr& column = block.get_by_position(arguments[0]).column; const DataTypePtr& data_type = block.get_by_position(arguments[0]).type; auto compression_arg = check_and_get_column_const( block.get_by_position(arguments.back()).column); if (compression_arg) { auto compression_arg_val = compression_arg->get_value(); if (compression_arg_val && compression_arg_val >= QUANTILE_STATE_COMPRESSION_MIN && compression_arg_val <= QUANTILE_STATE_COMPRESSION_MAX) { this->compression = compression_arg_val; } } WhichDataType which(data_type); MutableColumnPtr column_result = get_return_type_impl({})->create_column(); column_result->resize(input_rows_count); auto type_error = [&]() { return Status::RuntimeError("Illegal column {} of argument of function {}", block.get_by_position(arguments[0]).column->get_name(), get_name()); }; Status status = Status::OK(); if (which.is_nullable()) { const DataTypePtr& nested_data_type = static_cast(data_type.get())->get_nested_type(); WhichDataType nested_which(nested_data_type); if (nested_which.is_string_or_fixed_string()) { status = execute_internal(column, data_type, column_result); } else if (nested_which.is_int64()) { status = execute_internal(column, data_type, column_result); } else if (which.is_float32()) { status = execute_internal(column, data_type, column_result); } else if (which.is_float64()) { status = execute_internal(column, data_type, column_result); } else { return type_error(); } } else { if (which.is_string_or_fixed_string()) { status = execute_internal(column, data_type, column_result); } else if (which.is_int64()) { status = execute_internal(column, data_type, column_result); } else if (which.is_float32()) { status = execute_internal(column, data_type, column_result); } else if (which.is_float64()) { status = execute_internal(column, data_type, column_result); } else { return type_error(); } } if (status.ok()) { block.replace_by_position(result, std::move(column_result)); } return status; } private: float compression = 2048; }; template class FunctionQuantileStatePercent : public IFunction { public: static constexpr auto name = "quantile_percent"; String get_name() const override { return name; } static FunctionPtr create() { return std::make_shared>(); } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return std::make_shared(); } size_t get_number_of_arguments() const override { return 2; } bool use_default_implementation_for_nulls() const override { return false; } bool use_default_implementation_for_constants() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { auto res_data_column = ColumnFloat64::create(); auto& res = res_data_column->get_data(); auto data_null_map = ColumnUInt8::create(input_rows_count, 0); auto& null_map = data_null_map->get_data(); auto column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); if (auto* nullable = check_and_get_column(*column)) { VectorizedUtils::update_null_map(null_map, nullable->get_null_map_data()); column = nullable->get_nested_column_ptr(); } auto str_col = assert_cast*>(column.get()); auto& col_data = str_col->get_data(); auto percent_arg = check_and_get_column_const( block.get_by_position(arguments.back()).column); if (!percent_arg) { LOG(FATAL) << fmt::format( "Second argument to {} must be a constant string describing type", get_name()); } float percent_arg_value = percent_arg->get_value(); if (percent_arg_value < 0 || percent_arg_value > 1) { std::stringstream ss; ss << "the input argument of percentage: " << percent_arg_value << " is not valid, must be in range [0,1] "; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } res.reserve(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { if (null_map[i]) { // if null push_back meaningless result to make sure idxs can be matched res.push_back(0); continue; } res.push_back(col_data[i].get_value_by_percentile(percent_arg_value)); } block.replace_by_position(result, std::move(res_data_column)); return Status::OK(); } }; using FunctionQuantileStateEmpty = FunctionConst, false>; using FunctionQuantileStatePercentDouble = FunctionQuantileStatePercent; using FunctionToQuantileStateDouble = FunctionToQuantileState; void register_function_quantile_state(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); factory.register_function(); } } // namespace doris::vectorized