[Chore](function) refactor of quantile_state (#23862)

refactor of quantile_state
This commit is contained in:
Pxl
2023-09-06 15:39:19 +08:00
committed by GitHub
parent 7625d1514a
commit a96adc01aa
28 changed files with 268 additions and 381 deletions

View File

@ -33,7 +33,6 @@
#include "common/compiler_util.h" // IWYU pragma: keep
#include "common/status.h"
#include "util/quantile_state.h"
#include "util/string_parser.hpp"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/columns/column_complex.h"
@ -63,35 +62,29 @@ class FunctionContext;
namespace doris::vectorized {
template <typename InternalType>
struct QuantileStateEmpty {
static constexpr auto name = "quantile_state_empty";
using ReturnColVec = ColumnQuantileState<InternalType>;
static DataTypePtr get_return_type() {
return std::make_shared<DataTypeQuantileState<InternalType>>();
}
static auto init_value() { return QuantileState<InternalType> {}; }
using ReturnColVec = ColumnQuantileState;
static DataTypePtr get_return_type() { return std::make_shared<DataTypeQuantileState>(); }
static auto init_value() { return QuantileState {}; }
};
template <typename InternalType>
class FunctionToQuantileState : public IFunction {
public:
static constexpr auto name = "to_quantile_state";
String get_name() const override { return name; }
static FunctionPtr create() {
return std::make_shared<FunctionToQuantileState<InternalType>>();
}
static FunctionPtr create() { return std::make_shared<FunctionToQuantileState>(); }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeQuantileState<InternalType>>();
return std::make_shared<DataTypeQuantileState>();
}
size_t get_number_of_arguments() const override { return 2; }
bool use_default_implementation_for_nulls() const override { return false; }
template <typename ColumnType, bool is_nullable>
template <bool is_nullable>
Status execute_internal(const ColumnPtr& column, const DataTypePtr& data_type,
MutableColumnPtr& column_result) {
auto type_error = [&]() {
@ -100,75 +93,41 @@ public:
};
const ColumnNullable* col_nullable = nullptr;
const ColumnUInt8* col_nullmap = nullptr;
const ColumnType* col = nullptr;
const ColumnFloat64* col = nullptr;
const NullMap* nullmap = nullptr;
if constexpr (is_nullable) {
col_nullable = check_and_get_column<ColumnNullable>(column.get());
col_nullmap = check_and_get_column<ColumnUInt8>(
col_nullable->get_null_map_column_ptr().get());
col = check_and_get_column<ColumnType>(col_nullable->get_nested_column_ptr().get());
col = check_and_get_column<ColumnFloat64>(col_nullable->get_nested_column_ptr().get());
if (col == nullptr || col_nullmap == nullptr) {
return type_error();
}
nullmap = &col_nullmap->get_data();
} else {
col = check_and_get_column<ColumnType>(column.get());
col = check_and_get_column<ColumnFloat64>(column.get());
}
auto* res_column =
reinterpret_cast<ColumnQuantileState<InternalType>*>(column_result.get());
auto* res_column = reinterpret_cast<ColumnQuantileState*>(column_result.get());
auto& res_data = res_column->get_data();
size_t size = col->size();
for (size_t i = 0; i < size; ++i) {
if constexpr (is_nullable) {
if ((*nullmap)[i]) {
res_data[i].clear();
continue;
}
}
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
const ColumnString::Chars& data = col->get_chars();
const ColumnString::Offsets& offsets = col->get_offsets();
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
InternalType value = StringParser::string_to_float<InternalType>(raw_str, str_size,
&parse_result);
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
res_data[i].add_value(value);
} else {
std::stringstream ss;
ss << "The input column content: " << std::string(raw_str, str_size)
<< " is not valid in function: " << get_name();
LOG(WARNING) << ss.str();
return Status::InternalError(ss.str());
}
} else if constexpr (std::is_same_v<ColumnType, ColumnInt64> ||
std::is_same_v<ColumnType, ColumnFloat32> ||
std::is_same_v<ColumnType, ColumnFloat64>) {
// InternalType only can be double or float, so we can cast directly
InternalType value = (InternalType)col->get_data()[i];
res_data[i].set_compression(compression);
res_data[i].add_value(value);
} else {
type_error();
}
double value = (double)col->get_data()[i];
res_data[i].set_compression(compression);
res_data[i].add_value(value);
}
return Status::OK();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
if constexpr (!(std::is_same_v<InternalType, float> ||
std::is_same_v<InternalType, double>)) {
std::stringstream ss;
ss << "The InternalType of quantile_state must be float or double";
return Status::InternalError(ss.str());
}
const ColumnPtr& column = block.get_by_position(arguments[0]).column;
const DataTypePtr& data_type = block.get_by_position(arguments[0]).type;
auto compression_arg = check_and_get_column_const<ColumnFloat32>(
@ -184,39 +143,14 @@ public:
MutableColumnPtr column_result = get_return_type_impl({})->create_column();
column_result->resize(input_rows_count);
auto type_error = [&]() {
return Status::RuntimeError("Illegal column {} of argument of function {}",
block.get_by_position(arguments[0]).column->get_name(),
get_name());
};
Status status = Status::OK();
if (which.is_nullable()) {
const DataTypePtr& nested_data_type =
static_cast<const DataTypeNullable*>(data_type.get())->get_nested_type();
WhichDataType nested_which(nested_data_type);
if (nested_which.is_string_or_fixed_string()) {
status = execute_internal<ColumnString, true>(column, data_type, column_result);
} else if (nested_which.is_int64()) {
status = execute_internal<ColumnInt64, true>(column, data_type, column_result);
} else if (which.is_float32()) {
status = execute_internal<ColumnFloat32, true>(column, data_type, column_result);
} else if (which.is_float64()) {
status = execute_internal<ColumnFloat64, true>(column, data_type, column_result);
} else {
return type_error();
}
execute_internal<true>(column, data_type, column_result);
} else {
if (which.is_string_or_fixed_string()) {
status = execute_internal<ColumnString, false>(column, data_type, column_result);
} else if (which.is_int64()) {
status = execute_internal<ColumnInt64, false>(column, data_type, column_result);
} else if (which.is_float32()) {
status = execute_internal<ColumnFloat32, false>(column, data_type, column_result);
} else if (which.is_float64()) {
status = execute_internal<ColumnFloat64, false>(column, data_type, column_result);
} else {
return type_error();
}
execute_internal<false>(column, data_type, column_result);
}
if (status.ok()) {
block.replace_by_position(result, std::move(column_result));
@ -228,15 +162,12 @@ private:
float compression = 2048;
};
template <typename InternalType>
class FunctionQuantileStatePercent : public IFunction {
public:
static constexpr auto name = "quantile_percent";
String get_name() const override { return name; }
static FunctionPtr create() {
return std::make_shared<FunctionQuantileStatePercent<InternalType>>();
}
static FunctionPtr create() { return std::make_shared<FunctionQuantileStatePercent>(); }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeFloat64>();
@ -258,7 +189,7 @@ public:
VectorizedUtils::update_null_map(null_map, nullable->get_null_map_data());
column = nullable->get_nested_column_ptr();
}
auto str_col = assert_cast<const ColumnQuantileState<InternalType>*>(column.get());
auto str_col = assert_cast<const ColumnQuantileState*>(column.get());
auto& col_data = str_col->get_data();
auto percent_arg = check_and_get_column_const<ColumnFloat32>(
block.get_by_position(arguments.back()).column);
@ -290,14 +221,10 @@ public:
}
};
using FunctionQuantileStateEmpty = FunctionConst<QuantileStateEmpty<double>, false>;
using FunctionQuantileStatePercentDouble = FunctionQuantileStatePercent<double>;
using FunctionToQuantileStateDouble = FunctionToQuantileState<double>;
void register_function_quantile_state(SimpleFunctionFactory& factory) {
factory.register_function<FunctionQuantileStateEmpty>();
factory.register_function<FunctionQuantileStatePercentDouble>();
factory.register_function<FunctionToQuantileStateDouble>();
factory.register_function<FunctionConst<QuantileStateEmpty, false>>();
factory.register_function<FunctionQuantileStatePercent>();
factory.register_function<FunctionToQuantileState>();
}
} // namespace doris::vectorized