[Refactor](function) opt the exec of function with null column (#16256)
This commit is contained in:
@ -41,6 +41,8 @@ doris::Status VectorizedFnCall::prepare(doris::RuntimeState* state,
|
||||
argument_template.reserve(_children.size());
|
||||
std::vector<std::string_view> child_expr_name;
|
||||
for (auto child : _children) {
|
||||
// TODO: rethink we really create column here. maybe only need nullptr just to
|
||||
// get the function
|
||||
auto column = child->data_type()->create_column();
|
||||
argument_template.emplace_back(std::move(column), child->data_type(), child->expr_name());
|
||||
child_expr_name.emplace_back(child->expr_name());
|
||||
|
||||
@ -217,11 +217,12 @@ Status PreparedFunctionImpl::default_implementation_for_nulls(
|
||||
}
|
||||
|
||||
if (null_presence.has_nullable) {
|
||||
Block temporary_block = create_block_with_nested_columns(block, args, result);
|
||||
auto [temporary_block, new_args, new_result] =
|
||||
create_block_with_nested_columns(block, args, result);
|
||||
RETURN_IF_ERROR(execute_without_low_cardinality_columns(
|
||||
context, temporary_block, args, result, temporary_block.rows(), dry_run));
|
||||
context, temporary_block, new_args, new_result, temporary_block.rows(), dry_run));
|
||||
block.get_by_position(result).column =
|
||||
wrap_in_nullable(temporary_block.get_by_position(result).column, block, args,
|
||||
wrap_in_nullable(temporary_block.get_by_position(new_result).column, block, args,
|
||||
result, input_rows_count);
|
||||
*executed = true;
|
||||
return Status::OK();
|
||||
@ -295,10 +296,9 @@ DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality(
|
||||
}
|
||||
if (null_presence.has_nullable) {
|
||||
ColumnNumbers numbers(arguments.size());
|
||||
for (size_t i = 0; i < arguments.size(); i++) {
|
||||
numbers[i] = i;
|
||||
}
|
||||
Block nested_block = create_block_with_nested_columns(Block(arguments), numbers);
|
||||
std::iota(numbers.begin(), numbers.end(), 0);
|
||||
auto [nested_block, _] =
|
||||
create_block_with_nested_columns(Block(arguments), numbers, false);
|
||||
auto return_type = get_return_type_impl(
|
||||
ColumnsWithTypeAndName(nested_block.begin(), nested_block.end()));
|
||||
return make_nullable(return_type);
|
||||
|
||||
@ -1592,7 +1592,9 @@ private:
|
||||
Block tmp_block;
|
||||
size_t tmp_res_index = 0;
|
||||
if (source_is_nullable) {
|
||||
tmp_block = create_block_with_nested_columns_only_args(block, arguments);
|
||||
auto [t_block, tmp_args] =
|
||||
create_block_with_nested_columns(block, arguments, true);
|
||||
tmp_block = std::move(t_block);
|
||||
tmp_res_index = tmp_block.columns();
|
||||
tmp_block.insert({nullptr, nested_type, ""});
|
||||
|
||||
@ -1624,7 +1626,8 @@ private:
|
||||
return [wrapper, skip_not_null_check](FunctionContext* context, Block& block,
|
||||
const ColumnNumbers& arguments,
|
||||
const size_t result, size_t input_rows_count) {
|
||||
Block tmp_block = create_block_with_nested_columns(block, arguments, result);
|
||||
auto [tmp_block, tmp_args, tmp_res] =
|
||||
create_block_with_nested_columns(block, arguments, result);
|
||||
|
||||
/// Check that all values are not-NULL.
|
||||
/// Check can be skipped in case if LowCardinality dictionary is transformed.
|
||||
@ -1640,8 +1643,8 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(wrapper(context, tmp_block, arguments, result, input_rows_count));
|
||||
block.get_by_position(result).column = tmp_block.get_by_position(result).column;
|
||||
RETURN_IF_ERROR(wrapper(context, tmp_block, tmp_args, tmp_res, input_rows_count));
|
||||
block.get_by_position(result).column = tmp_block.get_by_position(tmp_res).column;
|
||||
return Status::OK();
|
||||
};
|
||||
} else {
|
||||
|
||||
@ -20,88 +20,83 @@
|
||||
|
||||
#include "vec/functions/function_helpers.h"
|
||||
|
||||
#include "common/consts.h"
|
||||
#include "vec/columns/column_nullable.h"
|
||||
#include "vec/data_types/data_type_nullable.h"
|
||||
#include "vec/functions/function.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
Block create_block_with_nested_columns_only_args(const Block& block, const ColumnNumbers& args) {
|
||||
std::set<size_t> args_set(args.begin(), args.end());
|
||||
std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& block,
|
||||
const ColumnNumbers& args,
|
||||
const bool need_check_same) {
|
||||
Block res;
|
||||
ColumnNumbers res_args(args.size());
|
||||
|
||||
for (auto i : args_set) {
|
||||
const auto& col = block.get_by_position(i);
|
||||
// only build temp block by args column, if args[i] == args[j]
|
||||
// just keep one
|
||||
for (size_t i = 0; i < args.size(); ++i) {
|
||||
bool is_in_res = false;
|
||||
size_t pre_loc = 0;
|
||||
|
||||
if (col.type->is_nullable()) {
|
||||
const DataTypePtr& nested_type =
|
||||
static_cast<const DataTypeNullable&>(*col.type).get_nested_type();
|
||||
|
||||
if (!col.column) {
|
||||
res.insert({nullptr, nested_type, col.name});
|
||||
} else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) {
|
||||
const auto& nested_col = nullable->get_nested_column_ptr();
|
||||
res.insert({nested_col, nested_type, col.name});
|
||||
} else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) {
|
||||
const auto& nested_col =
|
||||
check_and_get_column<ColumnNullable>(const_column->get_data_column())
|
||||
->get_nested_column_ptr();
|
||||
res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type,
|
||||
col.name});
|
||||
} else {
|
||||
LOG(FATAL) << "Illegal column for DataTypeNullable";
|
||||
if (need_check_same) {
|
||||
for (int j = 0; j < i; ++j) {
|
||||
if (args[j] == args[i]) {
|
||||
is_in_res = true;
|
||||
pre_loc = res_args[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_in_res) {
|
||||
const auto& col = block.get_by_position(args[i]);
|
||||
if (col.type->is_nullable()) {
|
||||
const DataTypePtr& nested_type =
|
||||
static_cast<const DataTypeNullable&>(*col.type).get_nested_type();
|
||||
|
||||
if (!col.column) {
|
||||
res.insert({nullptr, nested_type, col.name});
|
||||
} else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) {
|
||||
const auto& nested_col = nullable->get_nested_column_ptr();
|
||||
res.insert({nested_col, nested_type, col.name});
|
||||
} else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) {
|
||||
const auto& nested_col =
|
||||
check_and_get_column<ColumnNullable>(const_column->get_data_column())
|
||||
->get_nested_column_ptr();
|
||||
res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type,
|
||||
col.name});
|
||||
} else {
|
||||
LOG(FATAL) << "Illegal column for DataTypeNullable";
|
||||
}
|
||||
} else {
|
||||
res.insert(col);
|
||||
}
|
||||
|
||||
res_args[i] = res.columns() - 1;
|
||||
} else {
|
||||
res.insert(col);
|
||||
res_args[i] = pre_loc;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static Block create_block_with_nested_columns_impl(const Block& block,
|
||||
const std::unordered_set<size_t>& args) {
|
||||
Block res;
|
||||
size_t columns = block.columns();
|
||||
|
||||
for (size_t i = 0; i < columns; ++i) {
|
||||
const auto& col = block.get_by_position(i);
|
||||
|
||||
if (args.count(i) && col.type->is_nullable()) {
|
||||
const DataTypePtr& nested_type =
|
||||
static_cast<const DataTypeNullable&>(*col.type).get_nested_type();
|
||||
|
||||
if (!col.column) {
|
||||
res.insert({nullptr, nested_type, col.name});
|
||||
} else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) {
|
||||
const auto& nested_col = nullable->get_nested_column_ptr();
|
||||
res.insert({nested_col, nested_type, col.name});
|
||||
} else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) {
|
||||
const auto& nested_col =
|
||||
check_and_get_column<ColumnNullable>(const_column->get_data_column())
|
||||
->get_nested_column_ptr();
|
||||
res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type,
|
||||
col.name});
|
||||
} else {
|
||||
LOG(FATAL) << "Illegal column for DataTypeNullable";
|
||||
}
|
||||
} else
|
||||
res.insert(col);
|
||||
// TODO: only support match function, rethink the logic
|
||||
for (const auto& ctn : block) {
|
||||
if (ctn.name.size() > BeConsts::BLOCK_TEMP_COLUMN_PREFIX.size() &&
|
||||
starts_with(ctn.name, BeConsts::BLOCK_TEMP_COLUMN_PREFIX)) {
|
||||
res.insert(ctn);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
return {res, res_args};
|
||||
}
|
||||
|
||||
Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args) {
|
||||
std::unordered_set<size_t> args_set(args.begin(), args.end());
|
||||
return create_block_with_nested_columns_impl(block, args_set);
|
||||
}
|
||||
|
||||
Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args,
|
||||
size_t result) {
|
||||
std::unordered_set<size_t> args_set(args.begin(), args.end());
|
||||
args_set.insert(result);
|
||||
return create_block_with_nested_columns_impl(block, args_set);
|
||||
std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(const Block& block,
|
||||
const ColumnNumbers& args,
|
||||
size_t result) {
|
||||
auto [res, res_args] = create_block_with_nested_columns(block, args, true);
|
||||
// insert result column in temp block
|
||||
res.insert(block.get_by_position(result));
|
||||
return {res, res_args, res.columns() - 1};
|
||||
}
|
||||
|
||||
void validate_argument_type(const IFunction& func, const DataTypes& arguments,
|
||||
|
||||
@ -86,21 +86,17 @@ inline std::enable_if_t<IsDecimalNumber<T>, Field> to_field(const T& x, UInt32 s
|
||||
|
||||
Columns convert_const_tuple_to_constant_elements(const ColumnConst& column);
|
||||
|
||||
/// Returns the copy of a given block in which each column specified in
|
||||
/// the "arguments" parameter is replaced with its respective nested
|
||||
/// column if it is nullable.
|
||||
Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args);
|
||||
/// Returns the copy of a tmp block and temp args order same as args
|
||||
/// in which only args column each column specified in the "arguments"
|
||||
/// parameter is replaced with its respective nested column if it is nullable.
|
||||
std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& block,
|
||||
const ColumnNumbers& args,
|
||||
const bool need_check_same);
|
||||
|
||||
/// Similar function as above. Additionally transform the result type if needed.
|
||||
Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args,
|
||||
size_t result);
|
||||
|
||||
/// Returns the copy of a given block in only args column specified in
|
||||
/// the "arguments" parameter is replaced with its respective nested
|
||||
/// column if it is nullable.
|
||||
/// TODO: the old funciton `create_block_with_nested_columns` have performance problem, replace all
|
||||
/// by the function and delete old one.
|
||||
Block create_block_with_nested_columns_only_args(const Block& block, const ColumnNumbers& args);
|
||||
// Same as above and return the new_res loc in tuple
|
||||
std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(const Block& block,
|
||||
const ColumnNumbers& args,
|
||||
size_t result);
|
||||
|
||||
/// Checks argument type at specified index with predicate.
|
||||
/// throws if there is no argument at specified index or if predicate returns false.
|
||||
|
||||
Reference in New Issue
Block a user