[fix](delete) fix delete from bug which can get wrong result (#17146)

理论上,如果是两次独立的删除,比如delete from table where a=1; delete from table where a=2;其实这个地方应该可以使用的,但是目前的代码,是把所有不同版本的delete predicates和不同列的delete predicates都放到一起了,失去了版本信息、失去了谓词间可能是and的关系,统一弱化成了delete predicates都是独立的,有一个delete predicates满足条件,就把page都去掉。
这个pr的修改方式,就是在当前代码的基础上,当只有一个delete predicate的时候才能保证后续淘汰page的正确性,所以这里一律加了 == 1的判断才传递delete predicates。
如果要把不同版本的delete predicates和不同列的delete predicates作为完整和严谨的逻辑去判断page,需要修改的设计就有点多了,目前的方案算是一种优先解决bug的思路,后续可以进一步把delete predicates这块加速zone判断进行page淘汰的逻辑完善,提高delete predicates使用的场景。
This commit is contained in:
xueweizhang
2023-02-28 09:20:10 +08:00
committed by GitHub
parent b51ce415e7
commit e0cd8599d2
8 changed files with 405 additions and 27 deletions

View File

@ -311,8 +311,8 @@ void DeleteHandler::finalize() {
void DeleteHandler::get_delete_conditions_after_version(
int64_t version, AndBlockColumnPredicate* and_block_column_predicate_ptr,
std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>* col_id_to_del_predicates)
const {
std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>*
del_predicates_for_zone_map) const {
for (auto& del_cond : _del_conds) {
if (del_cond.filter_version > version) {
// now, only query support delete column predicate operator
@ -322,33 +322,28 @@ void DeleteHandler::get_delete_conditions_after_version(
new SingleColumnBlockPredicate(del_cond.column_predicate_vec[0]);
and_block_column_predicate_ptr->add_column_predicate(
single_column_block_predicate);
if (col_id_to_del_predicates->count(
if (del_predicates_for_zone_map->count(
del_cond.column_predicate_vec[0]->column_id()) < 1) {
col_id_to_del_predicates->insert(
del_predicates_for_zone_map->insert(
{del_cond.column_predicate_vec[0]->column_id(),
std::vector<const ColumnPredicate*> {}});
}
(*col_id_to_del_predicates)[del_cond.column_predicate_vec[0]->column_id()]
(*del_predicates_for_zone_map)[del_cond.column_predicate_vec[0]->column_id()]
.push_back(del_cond.column_predicate_vec[0]);
} else {
auto or_column_predicate = new OrBlockColumnPredicate();
// build or_column_predicate
std::for_each(
del_cond.column_predicate_vec.cbegin(),
del_cond.column_predicate_vec.cend(),
[&or_column_predicate,
col_id_to_del_predicates](const ColumnPredicate* predicate) {
if (col_id_to_del_predicates->count(predicate->column_id()) < 1) {
col_id_to_del_predicates->insert(
{predicate->column_id(),
std::vector<const ColumnPredicate*> {}});
}
(*col_id_to_del_predicates)[predicate->column_id()].push_back(
predicate);
or_column_predicate->add_column_predicate(
new SingleColumnBlockPredicate(predicate));
});
// when delete from where a = 1 and b = 2, we can not use del_predicates_for_zone_map to filter zone page,
// so here do not put predicate to del_predicates_for_zone_map,
// refer #17145 for more details.
// // TODO: need refactor design and code to use more version delete and more column delete to filter zone page.
std::for_each(del_cond.column_predicate_vec.cbegin(),
del_cond.column_predicate_vec.cend(),
[&or_column_predicate](const ColumnPredicate* predicate) {
or_column_predicate->add_column_predicate(
new SingleColumnBlockPredicate(predicate));
});
and_block_column_predicate_ptr->add_column_predicate(or_column_predicate);
}
}

View File

@ -101,7 +101,7 @@ public:
void get_delete_conditions_after_version(
int64_t version, AndBlockColumnPredicate* and_block_column_predicate_ptr,
std::unordered_map<int32_t, std::vector<const ColumnPredicate*>>*
col_id_to_del_predicates) const;
del_predicates_for_zone_map) const;
private:
// Use regular expression to extract 'column_name', 'op' and 'operands'

View File

@ -101,7 +101,7 @@ public:
std::vector<ColumnPredicate*> column_predicates;
std::vector<ColumnPredicate*> column_predicates_except_leafnode_of_andnode;
std::unordered_map<int32_t, std::shared_ptr<AndBlockColumnPredicate>> col_id_to_predicates;
std::unordered_map<int32_t, std::vector<const ColumnPredicate*>> col_id_to_del_predicates;
std::unordered_map<int32_t, std::vector<const ColumnPredicate*>> del_predicates_for_zone_map;
TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE;
// REQUIRED (null is not allowed)

View File

@ -38,7 +38,7 @@ void BetaRowsetReader::reset_read_options() {
_read_options.delete_condition_predicates = std::make_shared<AndBlockColumnPredicate>();
_read_options.column_predicates.clear();
_read_options.col_id_to_predicates.clear();
_read_options.col_id_to_del_predicates.clear();
_read_options.del_predicates_for_zone_map.clear();
_read_options.key_ranges.clear();
}
@ -83,7 +83,7 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
if (read_context->delete_handler != nullptr) {
read_context->delete_handler->get_delete_conditions_after_version(
_rowset->end_version(), _read_options.delete_condition_predicates.get(),
&_read_options.col_id_to_del_predicates);
&_read_options.del_predicates_for_zone_map);
}
std::vector<uint32_t> read_columns;

View File

@ -129,7 +129,7 @@ Status Segment::new_iterator(const Schema& schema, const StorageReadOptions& rea
}
RETURN_IF_ERROR(load_index());
if (read_options.col_id_to_del_predicates.empty() &&
if (read_options.delete_condition_predicates->num_of_column_predicate() == 0 &&
read_options.push_down_agg_type_opt != TPushAggOp::NONE) {
iter->reset(vectorized::new_vstatistics_iterator(this->shared_from_this(), schema));
} else {

View File

@ -380,8 +380,8 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
RETURN_IF_ERROR(_column_iterators[_schema.unique_id(cid)]->get_row_ranges_by_zone_map(
_opts.col_id_to_predicates[cid].get(),
_opts.col_id_to_del_predicates.count(cid) > 0
? &(_opts.col_id_to_del_predicates[cid])
_opts.del_predicates_for_zone_map.count(cid) > 0
? &(_opts.del_predicates_for_zone_map[cid])
: nullptr,
&column_row_ranges));
// intersect different columns's row ranges to get final row ranges by zone map

View File

@ -57,3 +57,289 @@
-- !sql --
-- !sql --
-- !sql --
-- !sql --
1 11
1 22
1 33
1 44
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 11
1 22
1 33
1 44
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 11
1 22
1 33
1 44
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
-- !sql --
1 22
1 33
1 44
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 22
1 33
1 44
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 33
1 44
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 33
1 44
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 44
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 44
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 55
1 66
1 77
1 88
1 99
1 100
1 101
1 102
1 111
1 122
1 133
1 144
1 155
1 166
1 177
1 188
1 199
1 200
1 201
1 202
-- !sql --
1 201
1 202
-- !sql --
1 201
-- !sql --
1 11
1 22
-- !sql --
1 11

View File

@ -54,4 +54,101 @@ suite("test_zone_map_delete") {
qt_sql """select * from ${tableName} ORDER BY k1;"""
sql """ DROP TABLE IF EXISTS ${tableName} """
// =========================
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` bigint(20) NULL,
`k2` largeint(40) NULL,
`k3` largeint(40) NULL
) ENGINE=OLAP
AGGREGATE KEY(`k1`, `k2`, `k3`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "true"
);
"""
sql """ truncate table ${tableName}; """
sql """ insert into ${tableName} values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
sql """ delete from ${tableName} where k2=0;"""
sql """ delete from ${tableName} where k2=1;"""
qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;"""
qt_sql """ select k2,k3 from ${tableName} where k2 = 1 ORDER BY k3;"""
sql """ truncate table ${tableName}; """
sql """ insert into ${tableName} values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
sql """ delete from ${tableName} where k2=1 and k2=0;"""
qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;"""
qt_sql """ select k2,k3 from ${tableName} where k2 = 1 ORDER BY k3;"""
sql """ truncate table ${tableName}; """
sql """ insert into ${tableName} values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
sql """ delete from ${tableName} where k2 is null and k3=00;"""
qt_sql """ select k2,k3 from ${tableName} ORDER BY k3;"""
qt_sql """ select k2,k3 from ${tableName} where k2 is null ORDER BY k3;"""
sql """delete from ${tableName} where k2 is not null and k3=11;"""
qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
qt_sql """select k2,k3 from ${tableName} where k2 is not null ORDER BY k3;"""
sql """delete from ${tableName} where k2=1 and k3=22;"""
qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
qt_sql """select k2,k3 from ${tableName} where k2=1 ORDER BY k3;"""
sql """delete from ${tableName} where k2!=0 and k3=33;"""
qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
qt_sql """select k2,k3 from ${tableName} where k2!=0 ORDER BY k3;"""
sql """delete from ${tableName} where k2 not in (0) and k3=44;"""
qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
qt_sql """select k2,k3 from ${tableName} where k2 not in (0) ORDER BY k3;"""
sql """delete from ${tableName} where k2=1 and k3 >= 11 and k3 <=200;"""
qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
qt_sql """select k2,k3 from ${tableName} where k3 = 201 ORDER BY k3;"""
sql """truncate table ${tableName};"""
sql """insert into ${tableName} values(0,1,11),(0,1,22),(0,1,33),(0,1,44),(0,1,55),(0,1,66),(0,1,77),(0,1,88),(0,1,99),(0,1,100),(0,1,101),(0,1,102),(0,1,111),(0,1,122),(0,1,133),(0,1,144),(0,1,155),(0,1,166),(0,1,177),(0,1,188),(0,1,199),(0,1,200),(0,1,201),(0,1,202);"""
sql """delete from ${tableName} where k2=1 and k3 <=202 and k3 >= 33;"""
qt_sql """select k2,k3 from ${tableName} ORDER BY k3;"""
qt_sql """select k2,k3 from ${tableName} where k3 = 11 ORDER BY k3;"""
sql """ DROP TABLE IF EXISTS ${tableName} """
}