[Exec](expr) Opt the compound pred performace (#45414) (#46232)

cherry-pick #45414

before:
```
 mysqlslap -hd3 -uroot -P9130  --create-schema=test_db2 -c 10 -i 500 -q "SELECT count(k) FROM sbtest1_dup WHERE k BETWEEN 4850578 AND 8454295 OR k BETWEEN 8776291 AND 29749077;"
Benchmark
	Average number of seconds to run all queries: 0.041 seconds
	Minimum number of seconds to run all queries: 0.037 seconds
	Maximum number of seconds to run all queries: 0.115 seconds
	Number of clients running queries: 10
	Average number of queries per client: 1
```

after:
```
mysqlslap -hd3 -uroot -P9030  --create-schema=test_db -c 10 -i 500 -q "SELECT count(k) FROM sbtest1 WHERE k BETWEEN 4850578 AND 8454295 OR k BETWEEN 8776291 AND 29749077;"
Benchmark
	Average number of seconds to run all queries: 0.029 seconds
	Minimum number of seconds to run all queries: 0.027 seconds
	Maximum number of seconds to run all queries: 0.034 seconds
	Number of clients running queries: 10
	Average number of queries per client: 1
```

### What problem does this PR solve?

Issue Number: close #xxx

Related PR: #xxx

Problem Summary:

### Release note

None

### Check List (For Author)

- Test <!-- At least one of them must be included. -->
    - [ ] Regression test
    - [ ] Unit Test
    - [ ] Manual test (add detailed scripts or steps below)
    - [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
        - [ ] Previous test can cover this change.
        - [ ] No code files have been changed.
        - [ ] Other reason <!-- Add your reason?  -->

- Behavior changed:
    - [ ] No.
    - [ ] Yes. <!-- Explain the behavior change -->

- Does this need documentation?
    - [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->

### Check List (For Reviewer who merge this PR)

- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
This commit is contained in:
HappenLee
2025-01-01 16:19:42 +08:00
committed by GitHub
parent 726e1c8c80
commit 8e730faec5

View File

@ -153,12 +153,15 @@ public:
if (fast_execute(context, block, result_column_id)) {
return Status::OK();
}
if (children().size() == 1 || !_all_child_is_compound_and_not_const()) {
if (children().size() == 1 || _has_const_child()) {
return VectorizedFnCall::execute(context, block, result_column_id);
}
int lhs_id = -1;
int rhs_id = -1;
bool lhs_mem_can_reuse = _children[0]->is_compound_predicate();
bool rhs_mem_can_reuse = _children[1]->is_compound_predicate();
RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id));
ColumnPtr lhs_column =
block->get_by_position(lhs_id).column->convert_to_full_column_if_const();
@ -205,13 +208,22 @@ public:
return Status::OK();
};
auto return_result_column_id = [&](ColumnPtr res_column, int res_id) -> int {
auto return_result_column_id = [&](ColumnPtr res_column, int res_id,
bool mem_reuse) -> int {
if (!mem_reuse) {
res_column = res_column->clone_resized(size);
}
if (result_is_nullable && !res_column->is_nullable()) {
auto result_column =
ColumnNullable::create(res_column, ColumnUInt8::create(size, 0));
res_id = block->columns();
block->insert({std::move(result_column), _data_type, _expr_name});
} else if (!mem_reuse) {
res_id = block->columns();
block->insert({std::move(res_column), _data_type, _expr_name});
}
return res_id;
};
@ -226,6 +238,33 @@ public:
return null_map_data;
};
auto vector_vector = [&]<bool is_and_op>() {
if (lhs_mem_can_reuse) {
*result_column_id = lhs_id;
} else if (rhs_mem_can_reuse) {
*result_column_id = rhs_id;
auto tmp_column = rhs_data_column;
rhs_data_column = lhs_data_column;
lhs_data_column = tmp_column;
} else {
*result_column_id = block->columns();
auto col_res = lhs_column->clone_resized(size);
lhs_data_column = assert_cast<ColumnUInt8*>(col_res.get())->get_data().data();
block->insert({std::move(col_res), _data_type, _expr_name});
}
if constexpr (is_and_op) {
for (size_t i = 0; i < size; ++i) {
lhs_data_column[i] &= rhs_data_column[i];
}
} else {
for (size_t i = 0; i < size; ++i) {
lhs_data_column[i] |= rhs_data_column[i];
}
}
};
auto vector_vector_null = [&]<bool is_and_op>() {
auto col_res = ColumnUInt8::create(size);
auto col_nulls = ColumnUInt8::create(size);
@ -261,28 +300,28 @@ public:
//2. nullable column: null map all is not null
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) {
// false and any = false, return lhs
*result_column_id = return_result_column_id(lhs_column, lhs_id);
*result_column_id = return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse);
} else {
RETURN_IF_ERROR(get_rhs_colum());
if ((lhs_all_true && !lhs_is_nullable) || //not null column
(lhs_all_true && lhs_all_is_not_null)) { //nullable column
// true and any = any, return rhs
*result_column_id = return_result_column_id(rhs_column, rhs_id);
*result_column_id =
return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse);
} else if ((rhs_all_false && !rhs_is_nullable) ||
(rhs_all_false && rhs_all_is_not_null)) {
// any and false = false, return rhs
*result_column_id = return_result_column_id(rhs_column, rhs_id);
*result_column_id =
return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse);
} else if ((rhs_all_true && !rhs_is_nullable) ||
(rhs_all_true && rhs_all_is_not_null)) {
// any and true = any, return lhs
*result_column_id = return_result_column_id(lhs_column, lhs_id);
*result_column_id =
return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse);
} else {
if (!result_is_nullable) {
*result_column_id = lhs_id;
for (size_t i = 0; i < size; i++) {
lhs_data_column[i] &= rhs_data_column[i];
}
vector_vector.template operator()<true>();
} else {
vector_vector_null.template operator()<true>();
}
@ -293,26 +332,26 @@ public:
// false or NULL ----> NULL
if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) {
// true or any = true, return lhs
*result_column_id = return_result_column_id(lhs_column, lhs_id);
*result_column_id = return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse);
} else {
RETURN_IF_ERROR(get_rhs_colum());
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) {
// false or any = any, return rhs
*result_column_id = return_result_column_id(rhs_column, rhs_id);
*result_column_id =
return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse);
} else if ((rhs_all_true && !rhs_is_nullable) ||
(rhs_all_true && rhs_all_is_not_null)) {
// any or true = true, return rhs
*result_column_id = return_result_column_id(rhs_column, rhs_id);
*result_column_id =
return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse);
} else if ((rhs_all_false && !rhs_is_nullable) ||
(rhs_all_false && rhs_all_is_not_null)) {
// any or false = any, return lhs
*result_column_id = return_result_column_id(lhs_column, lhs_id);
*result_column_id =
return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse);
} else {
if (!result_is_nullable) {
*result_column_id = lhs_id;
for (size_t i = 0; i < size; i++) {
lhs_data_column[i] |= rhs_data_column[i];
}
vector_vector.template operator()<false>();
} else {
vector_vector_null.template operator()<false>();
}
@ -337,14 +376,9 @@ private:
return (l_null & r_null) | (r_null & (r_null ^ a)) | (l_null & (l_null ^ b));
}
bool _all_child_is_compound_and_not_const() const {
for (auto child : _children) {
// we can make sure non const compound predicate's return column is allow modifyied locally.
if (child->is_constant() || !child->is_compound_predicate()) {
return false;
}
}
return true;
bool _has_const_child() const {
return std::ranges::any_of(_children,
[](const VExprSPtr& arg) -> bool { return arg->is_constant(); });
}
std::pair<uint8*, uint8*> _get_raw_data_and_null_map(ColumnPtr column,