[feature-wip](new-scan) refactor some interface about predicate push down in scan node (#12527)

This PR introduce a new enum type `PushDownType`:
```
enum class PushDownType {
        // The predicate can not be pushed down to data source
        UNACCEPTABLE,
        // The predicate can be pushed down to data source
        // and the data source can fully evaludate it
        ACCEPTABLE,
        // The predicate can be pushed down to data source
        // but the data source can not fully evaluate it.
        PARTIAL_ACCEPTABLE
    };
```

And derived class of VScanNode can override following method to determine whether to accept
a bianry/in/bloom filter/is null predicate:

```
PushDownType _should_push_down_binary_predicate();
PushDownType _should_push_down_in_predicate();
PushDownType _should_push_down_function_filter();
PushDownType _should_push_down_bloom_filter();
PushDownType _should_push_down_is_null_predicate();
```
This commit is contained in:
Mingyu Chen
2022-09-13 10:25:13 +08:00
committed by GitHub
parent 87439e227e
commit 8a274d7851
4 changed files with 225 additions and 186 deletions

View File

@ -125,7 +125,7 @@ static std::string olap_filters_to_string(const std::vector<doris::TCondition>&
filters_string += "[";
for (auto it = filters.cbegin(); it != filters.cend(); it++) {
if (it != filters.cbegin()) {
filters_string += ",";
filters_string += ", ";
}
filters_string += olap_filter_to_string(*it);
}
@ -181,6 +181,12 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
}
}
// Append value ranges in "_not_in_value_ranges"
for (auto& range : _not_in_value_ranges) {
std::visit([&](auto&& the_range) { the_range.to_in_condition(_olap_filters, false); },
range);
}
_runtime_profile->add_info_string("PushDownPredicates", olap_filters_to_string(_olap_filters));
_runtime_profile->add_info_string("KeyRanges", _scan_keys.debug_string());
VLOG_CRITICAL << _scan_keys.debug_string();
@ -188,67 +194,12 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
return Status::OK();
}
bool NewOlapScanNode::_should_push_down_binary_predicate(
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val,
int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker) {
if (!fn_checker(fn_call->fn().name.function_name)) {
return false;
}
const auto& children = fn_call->children();
DCHECK(children.size() == 2);
for (size_t i = 0; i < children.size(); i++) {
if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) {
// not a slot ref(column)
continue;
}
if (!children[1 - i]->is_constant()) {
// only handle constant value
return false;
} else {
if (const ColumnConst* const_column = check_and_get_column<ColumnConst>(
children[1 - i]->get_const_col(expr_ctx)->column_ptr)) {
*slot_ref_child = i;
*constant_val = const_column->get_data_at(0);
} else {
return false;
}
}
}
return true;
}
bool NewOlapScanNode::_should_push_down_in_predicate(VInPredicate* pred, VExprContext* expr_ctx,
bool is_not_in) {
if (pred->is_not_in() != is_not_in) {
return false;
}
InState* state = reinterpret_cast<InState*>(
expr_ctx->fn_context(pred->fn_context_index())
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
HybridSetBase* set = state->hybrid_set.get();
// if there are too many elements in InPredicate, exceed the limit,
// we will not push any condition of this column to storage engine.
// because too many conditions pushed down to storage engine may even
// slow down the query process.
// ATTN: This is just an experience value. You may need to try
// different thresholds to improve performance.
if (set->size() > _max_pushdown_conditions_per_column) {
VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit "
<< _max_pushdown_conditions_per_column;
return false;
}
return true;
}
bool NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_call,
VExprContext* expr_ctx,
StringVal* constant_str,
doris_udf::FunctionContext** fn_ctx) {
VScanNode::PushDownType NewOlapScanNode::_should_push_down_function_filter(
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str,
doris_udf::FunctionContext** fn_ctx) {
// Now only `like` function filters is supported to push down
if (fn_call->fn().name.function_name != "like") {
return false;
return PushDownType::UNACCEPTABLE;
}
const auto& children = fn_call->children();
@ -262,19 +213,19 @@ bool NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_cal
}
if (!children[1 - i]->is_constant()) {
// only handle constant value
return false;
return PushDownType::UNACCEPTABLE;
} else {
DCHECK(children[1 - i]->type().is_string_type());
if (const ColumnConst* const_column = check_and_get_column<ColumnConst>(
children[1 - i]->get_const_col(expr_ctx)->column_ptr)) {
*constant_str = const_column->get_data_at(0).to_string_val();
} else {
return false;
return PushDownType::UNACCEPTABLE;
}
}
}
*fn_ctx = func_cxt;
return true;
return PushDownType::ACCEPTABLE;
}
// PlanFragmentExecutor will call this method to set scan range

View File

@ -38,17 +38,13 @@ protected:
Status _process_conjuncts() override;
bool _is_key_column(const std::string& col_name) override;
bool _should_push_down_binary_predicate(
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val,
int* slot_ref_child,
const std::function<bool(const std::string&)>& fn_checker) override;
PushDownType _should_push_down_function_filter(VectorizedFnCall* fn_call,
VExprContext* expr_ctx, StringVal* constant_str,
doris_udf::FunctionContext** fn_ctx) override;
bool _should_push_down_in_predicate(VInPredicate* in_pred, VExprContext* expr_ctx,
bool is_not_in) override;
PushDownType _should_push_down_bloom_filter() override { return PushDownType::ACCEPTABLE; }
bool _should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx,
StringVal* constant_str,
doris_udf::FunctionContext** fn_ctx) override;
PushDownType _should_push_down_is_null_predicate() override { return PushDownType::ACCEPTABLE; }
Status _init_scanners(std::list<VScanner*>* scanners) override;

View File

@ -30,11 +30,11 @@
namespace doris::vectorized {
#define RETURN_IF_PUSH_DOWN(stmt) \
if (!push_down) { \
stmt; \
} else { \
return; \
#define RETURN_IF_PUSH_DOWN(stmt) \
if (pdt == PushDownType::UNACCEPTABLE) { \
stmt; \
} else { \
return; \
}
static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
@ -403,43 +403,44 @@ VExpr* VScanNode::_normalize_predicate(VExpr* conjunct_expr_root) {
if (conjunct_expr_root != nullptr) {
if (is_leaf(conjunct_expr_root)) {
auto impl = conjunct_expr_root->get_impl();
// If impl is not null, which means this a conjuncts from runtime filter.
VExpr* cur_expr = impl ? const_cast<VExpr*>(impl) : conjunct_expr_root;
SlotDescriptor* slot;
ColumnValueRangeType* range = nullptr;
bool push_down = false;
_eval_const_conjuncts(cur_expr, *(_vconjunct_ctx_ptr.get()), &push_down);
if (!push_down &&
PushDownType pdt = PushDownType::UNACCEPTABLE;
_eval_const_conjuncts(cur_expr, *(_vconjunct_ctx_ptr.get()), &pdt);
if (pdt == PushDownType::UNACCEPTABLE &&
(_is_predicate_acting_on_slot(cur_expr, in_predicate_checker, &slot, &range) ||
_is_predicate_acting_on_slot(cur_expr, eq_predicate_checker, &slot, &range))) {
std::visit(
[&](auto& value_range) {
RETURN_IF_PUSH_DOWN(_normalize_in_and_eq_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
&push_down));
&pdt));
RETURN_IF_PUSH_DOWN(_normalize_not_in_and_not_eq_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
&push_down));
&pdt));
RETURN_IF_PUSH_DOWN(_normalize_is_null_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
&push_down));
&pdt));
RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
&push_down));
&pdt));
if (_is_key_column(slot->col_name())) {
RETURN_IF_PUSH_DOWN(_normalize_bloom_filter(
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &push_down));
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt));
if (_state->enable_function_pushdown()) {
RETURN_IF_PUSH_DOWN(_normalize_function_filters(
cur_expr, *(_vconjunct_ctx_ptr.get()), slot,
&push_down));
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt));
}
}
},
*range);
}
if (push_down && _is_key_column(slot->col_name())) {
if (pdt == PushDownType::ACCEPTABLE && _is_key_column(slot->col_name())) {
return nullptr;
} else {
// for PARTIAL_ACCEPTABLE and UNACCEPTABLE, do not remove expr from the tree
return conjunct_expr_root;
}
} else {
@ -464,17 +465,20 @@ VExpr* VScanNode::_normalize_predicate(VExpr* conjunct_expr_root) {
}
Status VScanNode::_normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot,
bool* push_down) {
PushDownType* pdt) {
if (TExprNodeType::BLOOM_PRED == expr->node_type()) {
DCHECK(expr->children().size() == 1);
_bloom_filters_push_down.emplace_back(slot->col_name(), expr->get_bloom_filter_func());
*push_down = true;
PushDownType temp_pdt = _should_push_down_bloom_filter();
if (temp_pdt != PushDownType::UNACCEPTABLE) {
_bloom_filters_push_down.emplace_back(slot->col_name(), expr->get_bloom_filter_func());
*pdt = temp_pdt;
}
}
return Status::OK();
}
Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot, bool* push_down) {
SlotDescriptor* slot, PushDownType* pdt) {
bool opposite = false;
VExpr* fn_expr = expr;
if (TExprNodeType::COMPOUND_PRED == expr->node_type() &&
@ -486,11 +490,12 @@ Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ct
if (TExprNodeType::FUNCTION_CALL == fn_expr->node_type()) {
doris_udf::FunctionContext* fn_ctx = nullptr;
StringVal val;
if (_should_push_down_function_filter(reinterpret_cast<VectorizedFnCall*>(fn_expr),
expr_ctx, &val, &fn_ctx)) {
PushDownType temp_pdt = _should_push_down_function_filter(
reinterpret_cast<VectorizedFnCall*>(fn_expr), expr_ctx, &val, &fn_ctx);
if (temp_pdt != PushDownType::UNACCEPTABLE) {
std::string col = slot->col_name();
_push_down_functions.emplace_back(opposite, col, fn_ctx, val);
*push_down = true;
*pdt = temp_pdt;
}
}
return Status::OK();
@ -523,14 +528,14 @@ bool VScanNode::_is_predicate_acting_on_slot(
return true;
}
void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool* push_down) {
void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, PushDownType* pdt) {
char* constant_val = nullptr;
if (vexpr->is_constant()) {
if (const ColumnConst* const_column =
check_and_get_column<ColumnConst>(vexpr->get_const_col(expr_ctx)->column_ptr)) {
constant_val = const_cast<char*>(const_column->get_data_at(0).data);
if (constant_val == nullptr || *reinterpret_cast<bool*>(constant_val) == false) {
*push_down = true;
*pdt = PushDownType::ACCEPTABLE;
_eos = true;
}
} else {
@ -543,14 +548,14 @@ void VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool
template <PrimitiveType T>
Status VScanNode::_normalize_in_and_eq_predicate(VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot, ColumnValueRange<T>& range,
bool* push_down) {
PushDownType* pdt) {
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(slot->type().precision,
slot->type().scale);
bool effect = false;
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprNodeType::IN_PRED == expr->node_type()) {
VInPredicate* pred = static_cast<VInPredicate*>(expr);
if (!_should_push_down_in_predicate(pred, expr_ctx, false)) {
PushDownType temp_pdt = _should_push_down_in_predicate(pred, expr_ctx, false);
if (temp_pdt == PushDownType::UNACCEPTABLE) {
return Status::OK();
}
@ -573,43 +578,45 @@ Status VScanNode::_normalize_in_and_eq_predicate(VExpr* expr, VExprContext* expr
fn_name, !state->hybrid_set->is_date_v2()));
iter->next();
}
range.intersection(temp_range);
effect = true;
*pdt = temp_pdt;
} else if (TExprNodeType::BINARY_PRED == expr->node_type()) {
DCHECK(expr->children().size() == 2);
auto eq_checker = [](const std::string& fn_name) { return fn_name == "eq"; };
StringRef value;
int slot_ref_child = -1;
if (_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx,
&value, &slot_ref_child, eq_checker)) {
DCHECK(slot_ref_child >= 0);
// where A = nullptr should return empty result set
auto fn_name = std::string("");
if (value.data != nullptr) {
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
T == TYPE_HLL) {
auto val = StringValue(value.data, value.size);
RETURN_IF_ERROR(_change_value_range<true>(
temp_range, reinterpret_cast<void*>(&val),
ColumnValueRange<T>::add_fixed_value_range, fn_name));
} else {
RETURN_IF_ERROR(_change_value_range<true>(
temp_range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
ColumnValueRange<T>::add_fixed_value_range, fn_name));
}
range.intersection(temp_range);
effect = true;
}
PushDownType temp_pdt =
_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr),
expr_ctx, &value, &slot_ref_child, eq_checker);
if (temp_pdt == PushDownType::UNACCEPTABLE) {
return Status::OK();
}
DCHECK(slot_ref_child >= 0);
// where A = nullptr should return empty result set
auto fn_name = std::string("");
if (value.data != nullptr) {
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
T == TYPE_HLL) {
auto val = StringValue(value.data, value.size);
RETURN_IF_ERROR(_change_value_range<true>(
temp_range, reinterpret_cast<void*>(&val),
ColumnValueRange<T>::add_fixed_value_range, fn_name));
} else {
RETURN_IF_ERROR(_change_value_range<true>(
temp_range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
ColumnValueRange<T>::add_fixed_value_range, fn_name));
}
range.intersection(temp_range);
}
*pdt = temp_pdt;
}
// exceed limit, no conditions will be pushed down to storage engine.
if (range.get_fixed_value_size() > _max_pushdown_conditions_per_column) {
range.set_whole_value_range();
} else {
*push_down = effect;
*pdt = PushDownType::UNACCEPTABLE;
}
return Status::OK();
}
@ -618,14 +625,15 @@ template <PrimitiveType T>
Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>& range,
bool* push_down) {
PushDownType* pdt) {
bool is_fixed_range = range.is_fixed_value_range();
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(range.column_name());
bool effect = false;
PushDownType temp_pdt = PushDownType::UNACCEPTABLE;
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprNodeType::IN_PRED == expr->node_type()) {
VInPredicate* pred = static_cast<VInPredicate*>(expr);
if (!_should_push_down_in_predicate(pred, expr_ctx, true)) {
if ((temp_pdt = _should_push_down_in_predicate(pred, expr_ctx, true)) ==
PushDownType::UNACCEPTABLE) {
return Status::OK();
}
@ -652,55 +660,56 @@ Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte
}
iter->next();
}
effect = true;
} else if (TExprNodeType::BINARY_PRED == expr->node_type()) {
DCHECK(expr->children().size() == 2);
auto ne_checker = [](const std::string& fn_name) { return fn_name == "ne"; };
StringRef value;
int slot_ref_child = -1;
if (_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx,
&value, &slot_ref_child, ne_checker)) {
DCHECK(slot_ref_child >= 0);
// where A = nullptr should return empty result set
if (value.data != nullptr) {
auto fn_name = std::string("");
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
T == TYPE_HLL) {
auto val = StringValue(value.data, value.size);
if (is_fixed_range) {
RETURN_IF_ERROR(_change_value_range<true>(
range, reinterpret_cast<void*>(&val),
ColumnValueRange<T>::remove_fixed_value_range, fn_name));
} else {
RETURN_IF_ERROR(_change_value_range<true>(
not_in_range, reinterpret_cast<void*>(&val),
ColumnValueRange<T>::add_fixed_value_range, fn_name));
}
if ((temp_pdt = _should_push_down_binary_predicate(
reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx, &value, &slot_ref_child,
ne_checker)) == PushDownType::UNACCEPTABLE) {
return Status::OK();
}
DCHECK(slot_ref_child >= 0);
// where A = nullptr should return empty result set
if (value.data != nullptr) {
auto fn_name = std::string("");
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
T == TYPE_HLL) {
auto val = StringValue(value.data, value.size);
if (is_fixed_range) {
RETURN_IF_ERROR(_change_value_range<true>(
range, reinterpret_cast<void*>(&val),
ColumnValueRange<T>::remove_fixed_value_range, fn_name));
} else {
if (is_fixed_range) {
RETURN_IF_ERROR(_change_value_range<true>(
range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
ColumnValueRange<T>::remove_fixed_value_range, fn_name));
} else {
RETURN_IF_ERROR(_change_value_range<true>(
not_in_range,
reinterpret_cast<void*>(const_cast<char*>(value.data)),
ColumnValueRange<T>::add_fixed_value_range, fn_name));
}
RETURN_IF_ERROR(_change_value_range<true>(
not_in_range, reinterpret_cast<void*>(&val),
ColumnValueRange<T>::add_fixed_value_range, fn_name));
}
} else {
if (is_fixed_range) {
RETURN_IF_ERROR(_change_value_range<true>(
range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
ColumnValueRange<T>::remove_fixed_value_range, fn_name));
} else {
RETURN_IF_ERROR(_change_value_range<true>(
not_in_range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
ColumnValueRange<T>::add_fixed_value_range, fn_name));
}
effect = true;
}
}
} else {
return Status::OK();
}
if (is_fixed_range ||
not_in_range.get_fixed_value_size() <= _max_pushdown_conditions_per_column) {
if (!is_fixed_range) {
// push down not in condition to storage engine
not_in_range.to_in_condition(_olap_filters, false);
_not_in_value_ranges.push_back(not_in_range);
}
*push_down = effect;
*pdt = temp_pdt;
}
return Status::OK();
}
@ -708,21 +717,26 @@ Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte
template <PrimitiveType T>
Status VScanNode::_normalize_is_null_predicate(VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot, ColumnValueRange<T>& range,
bool* push_down) {
PushDownType* pdt) {
PushDownType temp_pdt = _should_push_down_is_null_predicate();
if (temp_pdt == PushDownType::UNACCEPTABLE) {
return Status::OK();
}
if (TExprNodeType::FUNCTION_CALL == expr->node_type()) {
if (reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name == "is_null_pred") {
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
slot->type().precision, slot->type().scale);
temp_range.set_contain_null(true);
range.intersection(temp_range);
*push_down = true;
*pdt = temp_pdt;
} else if (reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name ==
"is_not_null_pred") {
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
slot->type().precision, slot->type().scale);
temp_range.set_contain_null(false);
range.intersection(temp_range);
*push_down = true;
*pdt = temp_pdt;
}
}
return Status::OK();
@ -731,7 +745,7 @@ Status VScanNode::_normalize_is_null_predicate(VExpr* expr, VExprContext* expr_c
template <PrimitiveType T>
Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>& range, bool* push_down) {
ColumnValueRange<T>& range, PushDownType* pdt) {
if (TExprNodeType::BINARY_PRED == expr->node_type()) {
DCHECK(expr->children().size() == 2);
@ -740,15 +754,16 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e
};
StringRef value;
int slot_ref_child = -1;
if (_should_push_down_binary_predicate(reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx,
&value, &slot_ref_child, noneq_checker)) {
PushDownType temp_pdt = _should_push_down_binary_predicate(
reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx, &value, &slot_ref_child,
noneq_checker);
if (temp_pdt != PushDownType::UNACCEPTABLE) {
DCHECK(slot_ref_child >= 0);
const std::string& fn_name =
reinterpret_cast<VectorizedFnCall*>(expr)->fn().name.function_name;
// where A = nullptr should return empty result set
if (value.data != nullptr) {
*push_down = true;
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
T == TYPE_HLL) {
auto val = StringValue(value.data, value.size);
@ -760,6 +775,7 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e
range, reinterpret_cast<void*>(const_cast<char*>(value.data)),
ColumnValueRange<T>::add_value_range, fn_name, true, slot_ref_child));
}
*pdt = temp_pdt;
}
}
}
@ -906,4 +922,59 @@ Status VScanNode::clone_vconjunct_ctx(VExprContext** _vconjunct_ctx) {
return Status::OK();
}
VScanNode::PushDownType VScanNode::_should_push_down_binary_predicate(
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val,
int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker) {
if (!fn_checker(fn_call->fn().name.function_name)) {
return PushDownType::UNACCEPTABLE;
}
const auto& children = fn_call->children();
DCHECK(children.size() == 2);
for (size_t i = 0; i < children.size(); i++) {
if (VExpr::expr_without_cast(children[i])->node_type() != TExprNodeType::SLOT_REF) {
// not a slot ref(column)
continue;
}
if (!children[1 - i]->is_constant()) {
// only handle constant value
return PushDownType::UNACCEPTABLE;
} else {
if (const ColumnConst* const_column = check_and_get_column<ColumnConst>(
children[1 - i]->get_const_col(expr_ctx)->column_ptr)) {
*slot_ref_child = i;
*constant_val = const_column->get_data_at(0);
} else {
return PushDownType::UNACCEPTABLE;
}
}
}
return PushDownType::ACCEPTABLE;
}
VScanNode::PushDownType VScanNode::_should_push_down_in_predicate(VInPredicate* pred,
VExprContext* expr_ctx,
bool is_not_in) {
if (pred->is_not_in() != is_not_in) {
return PushDownType::UNACCEPTABLE;
}
InState* state = reinterpret_cast<InState*>(
expr_ctx->fn_context(pred->fn_context_index())
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
HybridSetBase* set = state->hybrid_set.get();
// if there are too many elements in InPredicate, exceed the limit,
// we will not push any condition of this column to storage engine.
// because too many conditions pushed down to storage engine may even
// slow down the query process.
// ATTN: This is just an experience value. You may need to try
// different thresholds to improve performance.
if (set->size() > _max_pushdown_conditions_per_column) {
VLOG_NOTICE << "Predicate value num " << set->size() << " exceed limit "
<< _max_pushdown_conditions_per_column;
return PushDownType::UNACCEPTABLE;
}
return PushDownType::ACCEPTABLE;
}
} // namespace doris::vectorized

View File

@ -73,6 +73,17 @@ public:
const TupleDescriptor* input_tuple_desc() const { return _input_tuple_desc; }
const TupleDescriptor* output_tuple_desc() const { return _output_tuple_desc; }
enum class PushDownType {
// The predicate can not be pushed down to data source
UNACCEPTABLE,
// The predicate can be pushed down to data source
// and the data source can fully evaludate it
ACCEPTABLE,
// The predicate can be pushed down to data source
// but the data source can not fully evaluate it.
PARTIAL_ACCEPTABLE
};
protected:
// Different data sources register different profiles by implementing this method
virtual Status _init_profile();
@ -105,21 +116,24 @@ protected:
// 2. in/not in predicate
// 3. function predicate
// TODO: these interfaces should be change to become more common.
virtual bool _should_push_down_binary_predicate(
virtual PushDownType _should_push_down_binary_predicate(
VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringRef* constant_val,
int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker) {
return false;
int* slot_ref_child, const std::function<bool(const std::string&)>& fn_checker);
virtual PushDownType _should_push_down_in_predicate(VInPredicate* in_pred,
VExprContext* expr_ctx, bool is_not_in);
virtual PushDownType _should_push_down_function_filter(VectorizedFnCall* fn_call,
VExprContext* expr_ctx,
StringVal* constant_str,
doris_udf::FunctionContext** fn_ctx) {
return PushDownType::UNACCEPTABLE;
}
virtual bool _should_push_down_in_predicate(VInPredicate* in_pred, VExprContext* expr_ctx,
bool is_not_in) {
return false;
}
virtual PushDownType _should_push_down_bloom_filter() { return PushDownType::UNACCEPTABLE; }
virtual bool _should_push_down_function_filter(VectorizedFnCall* fn_call,
VExprContext* expr_ctx, StringVal* constant_str,
doris_udf::FunctionContext** fn_ctx) {
return false;
virtual PushDownType _should_push_down_is_null_predicate() {
return PushDownType::UNACCEPTABLE;
}
// Return true if it is a key column.
@ -175,11 +189,18 @@ protected:
std::vector<FunctionFilter> _push_down_functions;
// slot id -> ColumnValueRange
// Parsed from conjunts
// Parsed from conjuncts
phmap::flat_hash_map<int, std::pair<SlotDescriptor*, ColumnValueRangeType>>
_slot_id_to_value_range;
// column -> ColumnValueRange
std::map<std::string, ColumnValueRangeType> _colname_to_value_range;
// We use _colname_to_value_range to store a column and its conresponding value ranges.
// But if a col is with value range, eg: 1 < col < 10, which is "!is_fixed_range",
// in this case we can not merge "1 < col < 10" with "col not in (2)".
// So we have to save "col not in (2)" to another structure: "_not_in_value_ranges".
// When the data source try to use the value ranges, it should use both ranges in
// "_colname_to_value_range" and in "_not_in_value_ranges"
std::vector<ColumnValueRangeType> _not_in_value_ranges;
bool _need_agg_finalize = true;
@ -233,13 +254,13 @@ private:
Status _normalize_conjuncts();
VExpr* _normalize_predicate(VExpr* conjunct_expr_root);
void _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, bool* push_down);
void _eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, PushDownType* pdt);
Status _normalize_bloom_filter(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot,
bool* push_down);
PushDownType* pdt);
Status _normalize_function_filters(VExpr* expr, VExprContext* expr_ctx, SlotDescriptor* slot,
bool* push_down);
PushDownType* pdt);
bool _is_predicate_acting_on_slot(VExpr* expr,
const std::function<bool(const std::vector<VExpr*>&,
@ -249,21 +270,21 @@ private:
template <PrimitiveType T>
Status _normalize_in_and_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot, ColumnValueRange<T>& range,
bool* push_down);
PushDownType* pdt);
template <PrimitiveType T>
Status _normalize_not_in_and_not_eq_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot, ColumnValueRange<T>& range,
bool* push_down);
PushDownType* pdt);
template <PrimitiveType T>
Status _normalize_noneq_binary_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot, ColumnValueRange<T>& range,
bool* push_down);
PushDownType* pdt);
template <PrimitiveType T>
Status _normalize_is_null_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot, ColumnValueRange<T>& range,
bool* push_down);
PushDownType* pdt);
template <bool IsFixed, PrimitiveType PrimitiveType, typename ChangeFixedValueRangeFunc>
static Status _change_value_range(ColumnValueRange<PrimitiveType>& range, void* value,