[fix](expr) Re fix BE core dump while common expr filter delete condition column (#29328)

Additional deleted filter condition will be materialized column be at the end of the block,
after _output_column_by_sel_idx will be erase, we not need to filter it,
so erase it from _columns_to_filter in the first next_batch.
Eg:
delete from table where a = 10;
select b from table;
a column only effective in segment iterator, the block from query engine only contain the b column,
so no need to filter a column by expr.
This commit is contained in:
Xinyi Zou
2023-12-30 08:40:03 +08:00
committed by GitHub
parent 5c9e0d906a
commit 56058623f6
2 changed files with 21 additions and 32 deletions

View File

@ -1504,8 +1504,6 @@ Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, r
// todo(wb) need a UT here
Status SegmentIterator::_vec_init_lazy_materialization() {
_is_pred_column.resize(_schema->columns().size(), false);
std::vector<bool> is_pred_column_no_del_condition;
is_pred_column_no_del_condition.resize(_schema->columns().size(), false);
// including short/vec/delete pred
std::set<ColumnId> pred_column_ids;
@ -1547,7 +1545,6 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
for (auto predicate : _col_predicates) {
auto cid = predicate->column_id();
_is_pred_column[cid] = true;
is_pred_column_no_del_condition[cid] = true;
pred_column_ids.insert(cid);
// check pred using short eval or vec eval
@ -1601,16 +1598,10 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
if (!_common_expr_columns.empty()) {
_is_need_expr_eval = true;
for (auto cid : _schema->column_ids()) {
// pred column also needs to be filtered by expr, exclude delete condition column,
// Delete condition column not need to be filtered, query engine does not need it,
// after _output_column_by_sel_idx, delete condition materialize column will be erase
// at the end of the block.
// Eg:
// `delete from table where a = 10;`
// `select b from table;`
// a column only effective in segment iterator, the block from query engine only contain the b column,
// so no need to filter a column by expr.
if (_is_common_expr_column[cid] || is_pred_column_no_del_condition[cid]) {
// pred column also needs to be filtered by expr, exclude additional delete condition column.
// if delete condition column not in the block, no filter is needed
// and will be removed from _columns_to_filter in the first next_batch.
if (_is_common_expr_column[cid] || _is_pred_column[cid]) {
auto loc = _schema_block_id_map[cid];
_columns_to_filter.push_back(loc);
}
@ -2171,6 +2162,22 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
_current_return_columns[cid]->reserve(_opts.block_row_max);
}
}
// Additional deleted filter condition will be materialized column be at the end of the block,
// after _output_column_by_sel_idx will be erase, we not need to filter it,
// so erase it from _columns_to_filter in the first next_batch.
// Eg:
// `delete from table where a = 10;`
// `select b from table;`
// a column only effective in segment iterator, the block from query engine only contain the b column,
// so no need to filter a column by expr.
for (auto it = _columns_to_filter.begin(); it != _columns_to_filter.end();) {
if (*it >= block->columns()) {
it = _columns_to_filter.erase(it);
} else {
++it;
}
}
}
_init_current_block(block, _current_return_columns);
_converted_column_ids.assign(_schema->columns().size(), 0);
@ -2185,25 +2192,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
_replace_version_col(_current_batch_rows_read);
}
// If col >= block->columns(), it means col should not be filtered, there is a BUG.
// such as delete condition column was incorrectly put into columns_to_filter,
// which is usually at the end of the block. only check during the first next_batch.
if (_opts.stats->blocks_load == 0) {
for (const auto& col : _columns_to_filter) {
if (col >= block->columns()) {
std::ostringstream ss;
for (const auto& i : _columns_to_filter) {
ss << i << "-";
}
throw Exception(
ErrorCode::INTERNAL_ERROR,
"filter block column id(index) greater than block->columns(), "
"column id={}, all columns that need filter={}, block columns num={}",
col, ss.str().substr(0, ss.str().length() - 1), block->columns());
}
}
}
_opts.stats->blocks_load += 1;
_opts.stats->raw_rows_read += _current_batch_rows_read;

View File

@ -63,6 +63,7 @@ suite("regression_test_variant_delete_and_update", "variant_type"){
sql """update ${table_name} set v = '{"updated_value" : 1111}' where k = 7"""
qt_sql "select * from ${table_name} order by k"
sql """delete from ${table_name} where v = 'xxx' or vs = 'yyy'"""
sql """delete from ${table_name} where vs = 'xxx' or vs = 'yyy'"""
qt_sql "select * from ${table_name} order by k"