[feature-wip](new-scan) refactor some interface about predicate push down in scan node (#12527)
This PR introduce a new enum type `PushDownType`:
```
enum class PushDownType {
// The predicate can not be pushed down to data source
UNACCEPTABLE,
// The predicate can be pushed down to data source
// and the data source can fully evaludate it
ACCEPTABLE,
// The predicate can be pushed down to data source
// but the data source can not fully evaluate it.
PARTIAL_ACCEPTABLE
};
```
And derived class of VScanNode can override following method to determine whether to accept
a bianry/in/bloom filter/is null predicate:
```
PushDownType _should_push_down_binary_predicate();
PushDownType _should_push_down_in_predicate();
PushDownType _should_push_down_function_filter();
PushDownType _should_push_down_bloom_filter();
PushDownType _should_push_down_is_null_predicate();
```
This commit is contained in:
@ -125,7 +125,7 @@ static std::string olap_filters_to_string(const std::vector<doris::TCondition>&
|
||||
filters_string += "[";
|
||||
for (auto it = filters.cbegin(); it != filters.cend(); it++) {
|
||||
if (it != filters.cbegin()) {
|
||||
filters_string += ",";
|
||||
filters_string += ", ";
|
||||
}
|
||||
filters_string += olap_filter_to_string(*it);
|
||||
}
|
||||
@ -181,6 +181,12 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
|
||||
}
|
||||
}
|
||||
|
||||
// Append value ranges in "_not_in_value_ranges"
|
||||
for (auto& range : _not_in_value_ranges) {
|
||||
std::visit([&](auto&& the_range) { the_range.to_in_condition(_olap_filters, false); },
|
||||
range);
|
||||
}
|
||||
|
||||
_runtime_profile->add_info_string("PushDownPredicates", olap_filters_to_string(_olap_filters));
|
||||
_runtime_profile->add_info_string("KeyRanges", _scan_keys.debug_string());
|
||||
VLOG_CRITICAL << _scan_keys.debug_string();
|
||||
@ -188,67 +194,12 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool NewOlapScanNode::_should_push_down_binary_predicate(
|
||||
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val,
|
||||
int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker) {
|
||||
if (!fn_checker(fn_call->fn().name.function_name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& children = fn_call->children();
|
||||
DCHECK(children.size() == 2);
|
||||
for (size_t i = 0; i < children.size(); i++) {
|
||||
if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) {
|
||||
// not a slot ref(column)
|
||||
continue;
|
||||
}
|
||||
if (!children[1 - i]->is_constant()) {
|
||||
// only handle constant value
|
||||
return false;
|
||||
} else {
|
||||
if (const ColumnConst* const_column = check_and_get_column<ColumnConst>(
|
||||
children[1 - i]->get_const_col(expr_ctx)->column_ptr)) {
|
||||
*slot_ref_child = i;
|
||||
*constant_val = const_column->get_data_at(0);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NewOlapScanNode::_should_push_down_in_predicate(VInPredicate* pred, VExprContext* expr_ctx,
|
||||
bool is_not_in) {
|
||||
if (pred->is_not_in() != is_not_in) {
|
||||
return false;
|
||||
}
|
||||
InState* state = reinterpret_cast<InState*>(
|
||||
expr_ctx->fn_context(pred->fn_context_index())
|
||||
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
HybridSetBase* set = state->hybrid_set.get();
|
||||
|
||||
// if there are too many elements in InPredicate, exceed the limit,
|
||||
// we will not push any condition of this column to storage engine.
|
||||
// because too many conditions pushed down to storage engine may even
|
||||
// slow down the query process.
|
||||
// ATTN: This is just an experience value. You may need to try
|
||||
// different thresholds to improve performance.
|
||||
if (set->size() > _max_pushdown_conditions_per_column) {
|
||||
VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit "
|
||||
<< _max_pushdown_conditions_per_column;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_call,
|
||||
VExprContext* expr_ctx,
|
||||
StringVal* constant_str,
|
||||
doris_udf::FunctionContext** fn_ctx) {
|
||||
VScanNode::PushDownType NewOlapScanNode::_should_push_down_function_filter(
|
||||
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str,
|
||||
doris_udf::FunctionContext** fn_ctx) {
|
||||
// Now only `like` function filters is supported to push down
|
||||
if (fn_call->fn().name.function_name != "like") {
|
||||
return false;
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
}
|
||||
|
||||
const auto& children = fn_call->children();
|
||||
@ -262,19 +213,19 @@ bool NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_cal
|
||||
}
|
||||
if (!children[1 - i]->is_constant()) {
|
||||
// only handle constant value
|
||||
return false;
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
} else {
|
||||
DCHECK(children[1 - i]->type().is_string_type());
|
||||
if (const ColumnConst* const_column = check_and_get_column<ColumnConst>(
|
||||
children[1 - i]->get_const_col(expr_ctx)->column_ptr)) {
|
||||
*constant_str = const_column->get_data_at(0).to_string_val();
|
||||
} else {
|
||||
return false;
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
*fn_ctx = func_cxt;
|
||||
return true;
|
||||
return PushDownType::ACCEPTABLE;
|
||||
}
|
||||
|
||||
// PlanFragmentExecutor will call this method to set scan range
|
||||
|
||||
@ -38,17 +38,13 @@ protected:
|
||||
Status _process_conjuncts() override;
|
||||
bool _is_key_column(const std::string& col_name) override;
|
||||
|
||||
bool _should_push_down_binary_predicate(
|
||||
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val,
|
||||
int* slot_ref_child,
|
||||
const std::function<bool(const std::string&)>& fn_checker) override;
|
||||
PushDownType _should_push_down_function_filter(VectorizedFnCall* fn_call,
|
||||
VExprContext* expr_ctx, StringVal* constant_str,
|
||||
doris_udf::FunctionContext** fn_ctx) override;
|
||||
|
||||
bool _should_push_down_in_predicate(VInPredicate* in_pred, VExprContext* expr_ctx,
|
||||
bool is_not_in) override;
|
||||
PushDownType _should_push_down_bloom_filter() override { return PushDownType::ACCEPTABLE; }
|
||||
|
||||
bool _should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx,
|
||||
StringVal* constant_str,
|
||||
doris_udf::FunctionContext** fn_ctx) override;
|
||||
PushDownType _should_push_down_is_null_predicate() override { return PushDownType::ACCEPTABLE; }
|
||||
|
||||
Status _init_scanners(std::list<VScanner*>* scanners) override;
|
||||
|
||||
|
||||
@ -30,11 +30,11 @@
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
#define RETURN_IF_PUSH_DOWN(stmt) \
|
||||
if (!push_down) { \
|
||||
stmt; \
|
||||
} else { \
|
||||
return; \
|
||||
#define RETURN_IF_PUSH_DOWN(stmt) \
|
||||
if (pdt == PushDownType::UNACCEPTABLE) { \
|
||||
stmt; \
|
||||
} else { \
|
||||
return; \
|
||||
}
|
||||
|
||||
static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
|
||||
@ -403,43 +403,44 @@ VExpr* VScanNode::_normalize_predicate(VExpr* conjunct_expr_root) {
|
||||
if (conjunct_expr_root != nullptr) {
|
||||
if (is_leaf(conjunct_expr_root)) {
|
||||
auto impl = conjunct_expr_root->get_impl();
|
||||
// If impl is not null, which means this a conjuncts from runtime filter.
|
||||
VExpr* cur_expr = impl ? const_cast<VExpr*>(impl) : conjunct_expr_root;
|
||||
SlotDescriptor* slot;
|
||||
ColumnValueRangeType* range = nullptr;
|
||||
bool push_down = false;
|
||||
_eval_const_conjuncts(cur_expr, *(_vconjunct_ctx_ptr.get()), &push_down);
|
||||
if (!push_down &&
|
||||
PushDownType pdt = PushDownType::UNACCEPTABLE;
|
||||
_eval_const_conjuncts(cur_expr, *(_vconjunct_ctx_ptr.get()), &pdt);
|
||||
if (pdt == PushDownType::UNACCEPTABLE &&
|
||||
(_is_predicate_acting_on_slot(cur_expr, in_predicate_checker, &slot, &range) ||
|
||||
_is_predicate_acting_on_slot(cur_expr, eq_predicate_checker, &slot, &range))) {
|
||||
std::visit(
|
||||
[&](auto& value_range) {
|
||||
RETURN_IF_PUSH_DOWN(_normalize_in_and_eq_predicate(
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
|
||||
&push_down));
|
||||
&pdt));
|
||||
RETURN_IF_PUSH_DOWN(_normalize_not_in_and_not_eq_predicate(
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
|
||||
&push_down));
|
||||
&pdt));
|
||||
RETURN_IF_PUSH_DOWN(_normalize_is_null_predicate(
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
|
||||
&push_down));
|
||||
&pdt));
|
||||
RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate(
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
|
||||
&push_down));
|
||||
&pdt));
|
||||
if (_is_key_column(slot->col_name())) {
|
||||
RETURN_IF_PUSH_DOWN(_normalize_bloom_filter(
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &push_down));
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt));
|
||||
if (_state->enable_function_pushdown()) {
|
||||
RETURN_IF_PUSH_DOWN(_normalize_function_filters(
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot,
|
||||
&push_down));
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt));
|
||||
}
|
||||
}
|
||||
},
|
||||
*range);
|
||||
}
|
||||
if (push_down && _is_key_column(slot->col_name())) {
|
||||
if (pdt == PushDownType::ACCEPTABLE && _is_key_column(slot->col_name())) {
|
||||
return nullptr;
|
||||
} else {
|
||||
// for PARTIAL_ACCEPTABLE and UNACCEPTABLE, do not remove expr from the tree
|
||||
return conjunct_expr_root;
|
||||
}
|
||||
} else {
|
||||
@ -464,17 +465,20 @@ VExpr* VScanNode::_normalize_predicate(VExpr* conjunct_expr_root) {
|
||||
}
|
||||
|
||||
Status VScanNode::_normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot,
|
||||
bool* push_down) {
|
||||
PushDownType* pdt) {
|
||||
if (TExprNodeType::BLOOM_PRED == expr->node_type()) {
|
||||
DCHECK(expr->children().size() == 1);
|
||||
_bloom_filters_push_down.emplace_back(slot->col_name(), expr->get_bloom_filter_func());
|
||||
*push_down = true;
|
||||
PushDownType temp_pdt = _should_push_down_bloom_filter();
|
||||
if (temp_pdt != PushDownType::UNACCEPTABLE) {
|
||||
_bloom_filters_push_down.emplace_back(slot->col_name(), expr->get_bloom_filter_func());
|
||||
*pdt = temp_pdt;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot, bool* push_down) {
|
||||
SlotDescriptor* slot, PushDownType* pdt) {
|
||||
bool opposite = false;
|
||||
VExpr* fn_expr = expr;
|
||||
if (TExprNodeType::COMPOUND_PRED == expr->node_type() &&
|
||||
@ -486,11 +490,12 @@ Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ct
|
||||
if (TExprNodeType::FUNCTION_CALL == fn_expr->node_type()) {
|
||||
doris_udf::FunctionContext* fn_ctx = nullptr;
|
||||
StringVal val;
|
||||
if (_should_push_down_function_filter(reinterpret_cast<VectorizedFnCall*>(fn_expr),
|
||||
expr_ctx, &val, &fn_ctx)) {
|
||||
PushDownType temp_pdt = _should_push_down_function_filter(
|
||||
reinterpret_cast<VectorizedFnCall*>(fn_expr), expr_ctx, &val, &fn_ctx);
|
||||
if (temp_pdt != PushDownType::UNACCEPTABLE) {
|
||||
std::string col = slot->col_name();
|
||||
_push_down_functions.emplace_back(opposite, col, fn_ctx, val);
|
||||
*push_down = true;
|
||||
*pdt = temp_pdt;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
@ -523,14 +528,14 @@ bool VScanNode::_is_predicate_acting_on_slot(
|
||||
return true;
|
||||
}
|
||||
|
||||
void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool* push_down) {
|
||||
void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, PushDownType* pdt) {
|
||||
char* constant_val = nullptr;
|
||||
if (vexpr->is_constant()) {
|
||||
if (const ColumnConst* const_column =
|
||||
check_and_get_column<ColumnConst>(vexpr->get_const_col(expr_ctx)->column_ptr)) {
|
||||
constant_val = const_cast<char*>(const_column->get_data_at(0).data);
|
||||
if (constant_val == nullptr || *reinterpret_cast<bool*>(constant_val) == false) {
|
||||
*push_down = true;
|
||||
*pdt = PushDownType::ACCEPTABLE;
|
||||
_eos = true;
|
||||
}
|
||||
} else {
|
||||
@ -543,14 +548,14 @@ void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool
|
||||
template <PrimitiveType T>
|
||||
Status VScanNode::_normalize_in_and_eq_predicate(VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot, ColumnValueRange<T>& range,
|
||||
bool* push_down) {
|
||||
PushDownType* pdt) {
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(slot->type().precision,
|
||||
slot->type().scale);
|
||||
bool effect = false;
|
||||
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
|
||||
if (TExprNodeType::IN_PRED == expr->node_type()) {
|
||||
VInPredicate* pred = static_cast<VInPredicate*>(expr);
|
||||
if (!_should_push_down_in_predicate(pred, expr_ctx, false)) {
|
||||
PushDownType temp_pdt = _should_push_down_in_predicate(pred, expr_ctx, false);
|
||||
if (temp_pdt == PushDownType::UNACCEPTABLE) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -573,43 +578,45 @@ Status VScanNode::_normalize_in_and_eq_predicate(VExpr* expr, VExprContext* expr
|
||||
fn_name, !state->hybrid_set->is_date_v2()));
|
||||
iter->next();
|
||||
}
|
||||
|
||||
range.intersection(temp_range);
|
||||
effect = true;
|
||||
*pdt = temp_pdt;
|
||||
} else if (TExprNodeType::BINARY_PRED == expr->node_type()) {
|
||||
DCHECK(expr->children().size() == 2);
|
||||
auto eq_checker = [](const std::string& fn_name) { return fn_name == "eq"; };
|
||||
|
||||
StringRef value;
|
||||
int slot_ref_child = -1;
|
||||
if (_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx,
|
||||
&value, &slot_ref_child, eq_checker)) {
|
||||
DCHECK(slot_ref_child >= 0);
|
||||
// where A = nullptr should return empty result set
|
||||
auto fn_name = std::string("");
|
||||
if (value.data != nullptr) {
|
||||
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
|
||||
T == TYPE_HLL) {
|
||||
auto val = StringValue(value.data, value.size);
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
temp_range, reinterpret_cast<void*>(&val),
|
||||
ColumnValueRange<T>::add_fixed_value_range, fn_name));
|
||||
} else {
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
temp_range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
|
||||
ColumnValueRange<T>::add_fixed_value_range, fn_name));
|
||||
}
|
||||
range.intersection(temp_range);
|
||||
effect = true;
|
||||
}
|
||||
|
||||
PushDownType temp_pdt =
|
||||
_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr),
|
||||
expr_ctx, &value, &slot_ref_child, eq_checker);
|
||||
if (temp_pdt == PushDownType::UNACCEPTABLE) {
|
||||
return Status::OK();
|
||||
}
|
||||
DCHECK(slot_ref_child >= 0);
|
||||
// where A = nullptr should return empty result set
|
||||
auto fn_name = std::string("");
|
||||
if (value.data != nullptr) {
|
||||
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
|
||||
T == TYPE_HLL) {
|
||||
auto val = StringValue(value.data, value.size);
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
temp_range, reinterpret_cast<void*>(&val),
|
||||
ColumnValueRange<T>::add_fixed_value_range, fn_name));
|
||||
} else {
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
temp_range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
|
||||
ColumnValueRange<T>::add_fixed_value_range, fn_name));
|
||||
}
|
||||
range.intersection(temp_range);
|
||||
}
|
||||
*pdt = temp_pdt;
|
||||
}
|
||||
|
||||
// exceed limit, no conditions will be pushed down to storage engine.
|
||||
if (range.get_fixed_value_size() > _max_pushdown_conditions_per_column) {
|
||||
range.set_whole_value_range();
|
||||
} else {
|
||||
*push_down = effect;
|
||||
*pdt = PushDownType::UNACCEPTABLE;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -618,14 +625,15 @@ template <PrimitiveType T>
|
||||
Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot,
|
||||
ColumnValueRange<T>& range,
|
||||
bool* push_down) {
|
||||
PushDownType* pdt) {
|
||||
bool is_fixed_range = range.is_fixed_value_range();
|
||||
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(range.column_name());
|
||||
bool effect = false;
|
||||
PushDownType temp_pdt = PushDownType::UNACCEPTABLE;
|
||||
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
|
||||
if (TExprNodeType::IN_PRED == expr->node_type()) {
|
||||
VInPredicate* pred = static_cast<VInPredicate*>(expr);
|
||||
if (!_should_push_down_in_predicate(pred, expr_ctx, true)) {
|
||||
if ((temp_pdt = _should_push_down_in_predicate(pred, expr_ctx, true)) ==
|
||||
PushDownType::UNACCEPTABLE) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -652,55 +660,56 @@ Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte
|
||||
}
|
||||
iter->next();
|
||||
}
|
||||
effect = true;
|
||||
} else if (TExprNodeType::BINARY_PRED == expr->node_type()) {
|
||||
DCHECK(expr->children().size() == 2);
|
||||
|
||||
auto ne_checker = [](const std::string& fn_name) { return fn_name == "ne"; };
|
||||
StringRef value;
|
||||
int slot_ref_child = -1;
|
||||
if (_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx,
|
||||
&value, &slot_ref_child, ne_checker)) {
|
||||
DCHECK(slot_ref_child >= 0);
|
||||
// where A = nullptr should return empty result set
|
||||
if (value.data != nullptr) {
|
||||
auto fn_name = std::string("");
|
||||
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
|
||||
T == TYPE_HLL) {
|
||||
auto val = StringValue(value.data, value.size);
|
||||
if (is_fixed_range) {
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
range, reinterpret_cast<void*>(&val),
|
||||
ColumnValueRange<T>::remove_fixed_value_range, fn_name));
|
||||
} else {
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
not_in_range, reinterpret_cast<void*>(&val),
|
||||
ColumnValueRange<T>::add_fixed_value_range, fn_name));
|
||||
}
|
||||
if ((temp_pdt = _should_push_down_binary_predicate(
|
||||
reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx, &value, &slot_ref_child,
|
||||
ne_checker)) == PushDownType::UNACCEPTABLE) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
DCHECK(slot_ref_child >= 0);
|
||||
// where A = nullptr should return empty result set
|
||||
if (value.data != nullptr) {
|
||||
auto fn_name = std::string("");
|
||||
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
|
||||
T == TYPE_HLL) {
|
||||
auto val = StringValue(value.data, value.size);
|
||||
if (is_fixed_range) {
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
range, reinterpret_cast<void*>(&val),
|
||||
ColumnValueRange<T>::remove_fixed_value_range, fn_name));
|
||||
} else {
|
||||
if (is_fixed_range) {
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
|
||||
ColumnValueRange<T>::remove_fixed_value_range, fn_name));
|
||||
} else {
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
not_in_range,
|
||||
reinterpret_cast<void*>(const_cast<char*>(value.data)),
|
||||
ColumnValueRange<T>::add_fixed_value_range, fn_name));
|
||||
}
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
not_in_range, reinterpret_cast<void*>(&val),
|
||||
ColumnValueRange<T>::add_fixed_value_range, fn_name));
|
||||
}
|
||||
} else {
|
||||
if (is_fixed_range) {
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
|
||||
ColumnValueRange<T>::remove_fixed_value_range, fn_name));
|
||||
} else {
|
||||
RETURN_IF_ERROR(_change_value_range<true>(
|
||||
not_in_range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
|
||||
ColumnValueRange<T>::add_fixed_value_range, fn_name));
|
||||
}
|
||||
effect = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (is_fixed_range ||
|
||||
not_in_range.get_fixed_value_size() <= _max_pushdown_conditions_per_column) {
|
||||
if (!is_fixed_range) {
|
||||
// push down not in condition to storage engine
|
||||
not_in_range.to_in_condition(_olap_filters, false);
|
||||
_not_in_value_ranges.push_back(not_in_range);
|
||||
}
|
||||
*push_down = effect;
|
||||
*pdt = temp_pdt;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -708,21 +717,26 @@ Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte
|
||||
template <PrimitiveType T>
|
||||
Status VScanNode::_normalize_is_null_predicate(VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot, ColumnValueRange<T>& range,
|
||||
bool* push_down) {
|
||||
PushDownType* pdt) {
|
||||
PushDownType temp_pdt = _should_push_down_is_null_predicate();
|
||||
if (temp_pdt == PushDownType::UNACCEPTABLE) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (TExprNodeType::FUNCTION_CALL == expr->node_type()) {
|
||||
if (reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name == "is_null_pred") {
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
|
||||
slot->type().precision, slot->type().scale);
|
||||
temp_range.set_contain_null(true);
|
||||
range.intersection(temp_range);
|
||||
*push_down = true;
|
||||
*pdt = temp_pdt;
|
||||
} else if (reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name ==
|
||||
"is_not_null_pred") {
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
|
||||
slot->type().precision, slot->type().scale);
|
||||
temp_range.set_contain_null(false);
|
||||
range.intersection(temp_range);
|
||||
*push_down = true;
|
||||
*pdt = temp_pdt;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
@ -731,7 +745,7 @@ Status VScanNode::_normalize_is_null_predicate(VExpr* expr, VExprContext* expr_c
|
||||
template <PrimitiveType T>
|
||||
Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot,
|
||||
ColumnValueRange<T>& range, bool* push_down) {
|
||||
ColumnValueRange<T>& range, PushDownType* pdt) {
|
||||
if (TExprNodeType::BINARY_PRED == expr->node_type()) {
|
||||
DCHECK(expr->children().size() == 2);
|
||||
|
||||
@ -740,15 +754,16 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e
|
||||
};
|
||||
StringRef value;
|
||||
int slot_ref_child = -1;
|
||||
if (_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx,
|
||||
&value, &slot_ref_child, noneq_checker)) {
|
||||
PushDownType temp_pdt = _should_push_down_binary_predicate(
|
||||
reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx, &value, &slot_ref_child,
|
||||
noneq_checker);
|
||||
if (temp_pdt != PushDownType::UNACCEPTABLE) {
|
||||
DCHECK(slot_ref_child >= 0);
|
||||
const std::string& fn_name =
|
||||
reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name;
|
||||
|
||||
// where A = nullptr should return empty result set
|
||||
if (value.data != nullptr) {
|
||||
*push_down = true;
|
||||
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
|
||||
T == TYPE_HLL) {
|
||||
auto val = StringValue(value.data, value.size);
|
||||
@ -760,6 +775,7 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e
|
||||
range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
|
||||
ColumnValueRange<T>::add_value_range, fn_name, true, slot_ref_child));
|
||||
}
|
||||
*pdt = temp_pdt;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -906,4 +922,59 @@ Status VScanNode::clone_vconjunct_ctx(VExprContext** _vconjunct_ctx) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
VScanNode::PushDownType VScanNode::_should_push_down_binary_predicate(
|
||||
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val,
|
||||
int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker) {
|
||||
if (!fn_checker(fn_call->fn().name.function_name)) {
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
}
|
||||
|
||||
const auto& children = fn_call->children();
|
||||
DCHECK(children.size() == 2);
|
||||
for (size_t i = 0; i < children.size(); i++) {
|
||||
if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) {
|
||||
// not a slot ref(column)
|
||||
continue;
|
||||
}
|
||||
if (!children[1 - i]->is_constant()) {
|
||||
// only handle constant value
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
} else {
|
||||
if (const ColumnConst* const_column = check_and_get_column<ColumnConst>(
|
||||
children[1 - i]->get_const_col(expr_ctx)->column_ptr)) {
|
||||
*slot_ref_child = i;
|
||||
*constant_val = const_column->get_data_at(0);
|
||||
} else {
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return PushDownType::ACCEPTABLE;
|
||||
}
|
||||
|
||||
VScanNode::PushDownType VScanNode::_should_push_down_in_predicate(VInPredicate* pred,
|
||||
VExprContext* expr_ctx,
|
||||
bool is_not_in) {
|
||||
if (pred->is_not_in() != is_not_in) {
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
}
|
||||
InState* state = reinterpret_cast<InState*>(
|
||||
expr_ctx->fn_context(pred->fn_context_index())
|
||||
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
HybridSetBase* set = state->hybrid_set.get();
|
||||
|
||||
// if there are too many elements in InPredicate, exceed the limit,
|
||||
// we will not push any condition of this column to storage engine.
|
||||
// because too many conditions pushed down to storage engine may even
|
||||
// slow down the query process.
|
||||
// ATTN: This is just an experience value. You may need to try
|
||||
// different thresholds to improve performance.
|
||||
if (set->size() > _max_pushdown_conditions_per_column) {
|
||||
VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit "
|
||||
<< _max_pushdown_conditions_per_column;
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
}
|
||||
return PushDownType::ACCEPTABLE;
|
||||
}
|
||||
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -73,6 +73,17 @@ public:
|
||||
const TupleDescriptor* input_tuple_desc() const { return _input_tuple_desc; }
|
||||
const TupleDescriptor* output_tuple_desc() const { return _output_tuple_desc; }
|
||||
|
||||
enum class PushDownType {
|
||||
// The predicate can not be pushed down to data source
|
||||
UNACCEPTABLE,
|
||||
// The predicate can be pushed down to data source
|
||||
// and the data source can fully evaludate it
|
||||
ACCEPTABLE,
|
||||
// The predicate can be pushed down to data source
|
||||
// but the data source can not fully evaluate it.
|
||||
PARTIAL_ACCEPTABLE
|
||||
};
|
||||
|
||||
protected:
|
||||
// Different data sources register different profiles by implementing this method
|
||||
virtual Status _init_profile();
|
||||
@ -105,21 +116,24 @@ protected:
|
||||
// 2. in/not in predicate
|
||||
// 3. function predicate
|
||||
// TODO: these interfaces should be change to become more common.
|
||||
virtual bool _should_push_down_binary_predicate(
|
||||
virtual PushDownType _should_push_down_binary_predicate(
|
||||
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val,
|
||||
int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker) {
|
||||
return false;
|
||||
int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker);
|
||||
|
||||
virtual PushDownType _should_push_down_in_predicate(VInPredicate* in_pred,
|
||||
VExprContext* expr_ctx, bool is_not_in);
|
||||
|
||||
virtual PushDownType _should_push_down_function_filter(VectorizedFnCall* fn_call,
|
||||
VExprContext* expr_ctx,
|
||||
StringVal* constant_str,
|
||||
doris_udf::FunctionContext** fn_ctx) {
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
}
|
||||
|
||||
virtual bool _should_push_down_in_predicate(VInPredicate* in_pred, VExprContext* expr_ctx,
|
||||
bool is_not_in) {
|
||||
return false;
|
||||
}
|
||||
virtual PushDownType _should_push_down_bloom_filter() { return PushDownType::UNACCEPTABLE; }
|
||||
|
||||
virtual bool _should_push_down_function_filter(VectorizedFnCall* fn_call,
|
||||
VExprContext* expr_ctx, StringVal* constant_str,
|
||||
doris_udf::FunctionContext** fn_ctx) {
|
||||
return false;
|
||||
virtual PushDownType _should_push_down_is_null_predicate() {
|
||||
return PushDownType::UNACCEPTABLE;
|
||||
}
|
||||
|
||||
// Return true if it is a key column.
|
||||
@ -175,11 +189,18 @@ protected:
|
||||
std::vector<FunctionFilter> _push_down_functions;
|
||||
|
||||
// slot id -> ColumnValueRange
|
||||
// Parsed from conjunts
|
||||
// Parsed from conjuncts
|
||||
phmap::flat_hash_map<int, std::pair<SlotDescriptor*, ColumnValueRangeType>>
|
||||
_slot_id_to_value_range;
|
||||
// column -> ColumnValueRange
|
||||
std::map<std::string, ColumnValueRangeType> _colname_to_value_range;
|
||||
// We use _colname_to_value_range to store a column and its conresponding value ranges.
|
||||
// But if a col is with value range, eg: 1 < col < 10, which is "!is_fixed_range",
|
||||
// in this case we can not merge "1 < col < 10" with "col not in (2)".
|
||||
// So we have to save "col not in (2)" to another structure: "_not_in_value_ranges".
|
||||
// When the data source try to use the value ranges, it should use both ranges in
|
||||
// "_colname_to_value_range" and in "_not_in_value_ranges"
|
||||
std::vector<ColumnValueRangeType> _not_in_value_ranges;
|
||||
|
||||
bool _need_agg_finalize = true;
|
||||
|
||||
@ -233,13 +254,13 @@ private:
|
||||
|
||||
Status _normalize_conjuncts();
|
||||
VExpr* _normalize_predicate(VExpr* conjunct_expr_root);
|
||||
void _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool* push_down);
|
||||
void _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, PushDownType* pdt);
|
||||
|
||||
Status _normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot,
|
||||
bool* push_down);
|
||||
PushDownType* pdt);
|
||||
|
||||
Status _normalize_function_filters(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot,
|
||||
bool* push_down);
|
||||
PushDownType* pdt);
|
||||
|
||||
bool _is_predicate_acting_on_slot(VExpr* expr,
|
||||
const std::function<bool(const std::vector<VExpr*>&,
|
||||
@ -249,21 +270,21 @@ private:
|
||||
template <PrimitiveType T>
|
||||
Status _normalize_in_and_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot, ColumnValueRange<T>& range,
|
||||
bool* push_down);
|
||||
PushDownType* pdt);
|
||||
template <PrimitiveType T>
|
||||
Status _normalize_not_in_and_not_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot, ColumnValueRange<T>& range,
|
||||
bool* push_down);
|
||||
PushDownType* pdt);
|
||||
|
||||
template <PrimitiveType T>
|
||||
Status _normalize_noneq_binary_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot, ColumnValueRange<T>& range,
|
||||
bool* push_down);
|
||||
PushDownType* pdt);
|
||||
|
||||
template <PrimitiveType T>
|
||||
Status _normalize_is_null_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot, ColumnValueRange<T>& range,
|
||||
bool* push_down);
|
||||
PushDownType* pdt);
|
||||
|
||||
template <bool IsFixed, PrimitiveType PrimitiveType, typename ChangeFixedValueRangeFunc>
|
||||
static Status _change_value_range(ColumnValueRange<PrimitiveType>& range, void* value,
|
||||
|
||||
Reference in New Issue
Block a user