[fix](scan) Incorrect scan keys lead to wrong query results. (#40814) (#40971)

## Proposed changes
pick #40814
```
mysql [doris_14555]>select * from table_9436528_3;
+------+------+------+------+------------------------+--------------------+------+
| col1 | col2 | col3 | col5 | col4                   | col6               | col7 |
+------+------+------+------+------------------------+--------------------+------+
| -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.904282 | NULL |
| -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |   23423423.0324234 | NULL |
| -100 |    0 |  -82 |    0 | 2023-11-11 10:49:43.00 |   840968969.872149 | NULL |
```
wrong result:
```
mysql [doris_14555]>select * from table_9436528_3 where col1 <= -100 and col2 in (true, false) and col3 = -82;
+------+------+------+------+------------------------+--------------------+------+
| col1 | col2 | col3 | col5 | col4                   | col6               | col7 |
+------+------+------+------+------------------------+--------------------+------+
| -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.904282 | NULL |
| -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |   23423423.0324234 | NULL |
+------+------+------+------+------------------------+--------------------+------+
```

## Proposed changes

Issue Number: close #xxx

<!--Describe your changes.-->
This commit is contained in:
Jerry Hu
2024-09-19 22:01:02 +08:00
committed by GitHub
parent f2b93d5896
commit b8bc9b699c
5 changed files with 181 additions and 14 deletions

View File

@ -455,7 +455,7 @@ public:
template <PrimitiveType primitive_type>
Status extend_scan_key(ColumnValueRange<primitive_type>& range, int32_t max_scan_key_num,
bool* exact_value, bool* eos);
bool* exact_value, bool* eos, bool* should_break);
Status get_key_range(std::vector<std::unique_ptr<OlapScanRange>>* key_range);
@ -1100,7 +1100,8 @@ bool ColumnValueRange<primitive_type>::has_intersection(ColumnValueRange<primiti
template <PrimitiveType primitive_type>
Status OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
int32_t max_scan_key_num, bool* exact_value, bool* eos) {
int32_t max_scan_key_num, bool* exact_value, bool* eos,
bool* should_break) {
using CppType = typename PrimitiveTypeTraits<primitive_type>::CppType;
using ConstIterator = typename std::set<CppType>::const_iterator;
@ -1124,6 +1125,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
range.convert_to_range_value();
*exact_value = false;
} else {
*should_break = true;
return Status::OK();
}
}

View File

@ -451,9 +451,13 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() {
// we use `exact_range` to identify a key range is an exact range or not when we convert
// it to `_scan_keys`. If `exact_range` is true, we can just discard it from `_olap_filters`.
bool exact_range = true;
// If the `_scan_keys` cannot extend by the range of column, should stop.
bool should_break = false;
bool eos = false;
for (int column_index = 0;
column_index < column_names.size() && !_scan_keys.has_range_value() && !eos;
for (int column_index = 0; column_index < column_names.size() &&
!_scan_keys.has_range_value() && !eos && !should_break;
++column_index) {
auto iter = _colname_to_value_range.find(column_names[column_index]);
if (_colname_to_value_range.end() == iter) {
@ -467,8 +471,9 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() {
// but the original range may be converted to olap filters, if it's not a exact_range.
auto temp_range = range;
if (range.get_fixed_value_size() <= p._max_pushdown_conditions_per_column) {
RETURN_IF_ERROR(_scan_keys.extend_scan_key(
temp_range, p._max_scan_key_num, &exact_range, &eos));
RETURN_IF_ERROR(
_scan_keys.extend_scan_key(temp_range, p._max_scan_key_num,
&exact_range, &eos, &should_break));
if (exact_range) {
_colname_to_value_range.erase(iter->first);
}
@ -476,8 +481,9 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() {
// if exceed max_pushdown_conditions_per_column, use whole_value_rang instead
// and will not erase from _colname_to_value_range, it must be not exact_range
temp_range.set_whole_value_range();
RETURN_IF_ERROR(_scan_keys.extend_scan_key(
temp_range, p._max_scan_key_num, &exact_range, &eos));
RETURN_IF_ERROR(
_scan_keys.extend_scan_key(temp_range, p._max_scan_key_num,
&exact_range, &eos, &should_break));
}
return Status::OK();
},

View File

@ -273,9 +273,13 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
// we use `exact_range` to identify a key range is an exact range or not when we convert
// it to `_scan_keys`. If `exact_range` is true, we can just discard it from `_olap_filters`.
bool exact_range = true;
// If the `_scan_keys` cannot extend by the range of column, should stop.
bool should_break = false;
bool eos = false;
for (int column_index = 0;
column_index < column_names.size() && !_scan_keys.has_range_value() && !eos;
for (int column_index = 0; column_index < column_names.size() &&
!_scan_keys.has_range_value() && !eos && !should_break;
++column_index) {
auto iter = _colname_to_value_range.find(column_names[column_index]);
if (_colname_to_value_range.end() == iter) {
@ -289,8 +293,9 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
// but the original range may be converted to olap filters, if it's not a exact_range.
auto temp_range = range;
if (range.get_fixed_value_size() <= _max_pushdown_conditions_per_column) {
RETURN_IF_ERROR(_scan_keys.extend_scan_key(
temp_range, _max_scan_key_num, &exact_range, &eos));
RETURN_IF_ERROR(
_scan_keys.extend_scan_key(temp_range, _max_scan_key_num,
&exact_range, &eos, &should_break));
if (exact_range) {
_colname_to_value_range.erase(iter->first);
}
@ -298,8 +303,9 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() {
// if exceed max_pushdown_conditions_per_column, use whole_value_rang instead
// and will not erase from _colname_to_value_range, it must be not exact_range
temp_range.set_whole_value_range();
RETURN_IF_ERROR(_scan_keys.extend_scan_key(
temp_range, _max_scan_key_num, &exact_range, &eos));
RETURN_IF_ERROR(
_scan_keys.extend_scan_key(temp_range, _max_scan_key_num,
&exact_range, &eos, &should_break));
}
return Status::OK();
},