[refactor](delete predicate) Unify delete to segmentiterator (#11650)
* remove seek columns and unify delete columns in rowset reader Co-authored-by: yiguolei <yiguolei@gmail.com>
This commit is contained in:
@ -202,7 +202,7 @@ CollectIterator::Level0Iterator::Level0Iterator(RowsetReaderSharedPtr rs_reader,
|
||||
CollectIterator::Level0Iterator::~Level0Iterator() = default;
|
||||
|
||||
Status CollectIterator::Level0Iterator::init() {
|
||||
RETURN_NOT_OK_LOG(_row_cursor.init(_reader->_tablet_schema, _reader->_seek_columns),
|
||||
RETURN_NOT_OK_LOG(_row_cursor.init(_reader->_tablet_schema, _reader->_return_columns),
|
||||
"failed to init row cursor");
|
||||
return (this->*_refresh_current_row)();
|
||||
}
|
||||
|
||||
@ -212,7 +212,6 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params,
|
||||
_reader_context.return_columns = &_return_columns;
|
||||
_reader_context.read_orderby_key_columns =
|
||||
_orderby_key_columns.size() > 0 ? &_orderby_key_columns : nullptr;
|
||||
_reader_context.seek_columns = &_seek_columns;
|
||||
_reader_context.load_bf_columns = &_load_bf_columns;
|
||||
_reader_context.load_bf_all_columns = &_load_bf_all_columns;
|
||||
_reader_context.conditions = &_conditions;
|
||||
@ -270,15 +269,11 @@ Status TabletReader::_init_params(const ReaderParams& read_params) {
|
||||
LOG(WARNING) << "fail to init keys param. res=" << res;
|
||||
return res;
|
||||
}
|
||||
|
||||
res = _init_orderby_keys_param(read_params);
|
||||
if (!res.ok()) {
|
||||
LOG(WARNING) << "fail to init orderby keys param. res=" << res;
|
||||
return res;
|
||||
}
|
||||
|
||||
_init_seek_columns();
|
||||
|
||||
if (_tablet_schema->has_sequence_col()) {
|
||||
auto sequence_col_idx = _tablet_schema->sequence_col_idx();
|
||||
DCHECK_NE(sequence_col_idx, -1);
|
||||
@ -298,19 +293,6 @@ Status TabletReader::_init_return_columns(const ReaderParams& read_params) {
|
||||
if (read_params.reader_type == READER_QUERY) {
|
||||
_return_columns = read_params.return_columns;
|
||||
_tablet_columns_convert_to_null_set = read_params.tablet_columns_convert_to_null_set;
|
||||
|
||||
if (!_delete_handler.empty()) {
|
||||
// We need to fetch columns which there are deletion conditions on them.
|
||||
std::set<uint32_t> column_set(_return_columns.begin(), _return_columns.end());
|
||||
for (const auto& conds : _delete_handler.get_delete_conditions()) {
|
||||
for (const auto& cond_column : conds.del_cond->columns()) {
|
||||
if (column_set.find(cond_column.first) == column_set.end()) {
|
||||
column_set.insert(cond_column.first);
|
||||
_return_columns.push_back(cond_column.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto id : read_params.return_columns) {
|
||||
if (_tablet_schema->column(id).is_key()) {
|
||||
_key_cids.push_back(id);
|
||||
@ -360,26 +342,6 @@ Status TabletReader::_init_return_columns(const ReaderParams& read_params) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void TabletReader::_init_seek_columns() {
|
||||
std::unordered_set<uint32_t> column_set(_return_columns.begin(), _return_columns.end());
|
||||
for (auto& it : _conditions.columns()) {
|
||||
column_set.insert(it.first);
|
||||
}
|
||||
size_t max_key_column_count = 0;
|
||||
for (const auto& key : _keys_param.start_keys) {
|
||||
max_key_column_count = std::max(max_key_column_count, key.field_count());
|
||||
}
|
||||
for (const auto& key : _keys_param.end_keys) {
|
||||
max_key_column_count = std::max(max_key_column_count, key.field_count());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < _tablet_schema->num_columns(); i++) {
|
||||
if (i < max_key_column_count || column_set.find(i) != column_set.end()) {
|
||||
_seek_columns.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status TabletReader::_init_keys_param(const ReaderParams& read_params) {
|
||||
if (read_params.start_key.empty()) {
|
||||
return Status::OK();
|
||||
|
||||
@ -175,7 +175,6 @@ protected:
|
||||
Status _init_delete_condition(const ReaderParams& read_params);
|
||||
|
||||
Status _init_return_columns(const ReaderParams& read_params);
|
||||
void _init_seek_columns();
|
||||
|
||||
void _init_load_bf_columns(const ReaderParams& read_params);
|
||||
void _init_load_bf_columns(const ReaderParams& read_params, Conditions* conditions,
|
||||
@ -194,7 +193,6 @@ protected:
|
||||
// only use in outer join which change the column nullable which must keep same in
|
||||
// vec query engine
|
||||
std::unordered_set<uint32_t>* _tablet_columns_convert_to_null_set = nullptr;
|
||||
std::vector<uint32_t> _seek_columns;
|
||||
|
||||
TabletSharedPtr _tablet;
|
||||
RowsetReaderContext _reader_context;
|
||||
|
||||
@ -47,6 +47,13 @@ RowBlock::~RowBlock() {
|
||||
|
||||
void RowBlock::init(const RowBlockInfo& block_info) {
|
||||
_info = block_info;
|
||||
// Sometimes info.column_ids is not set, which means all columns in the schema
|
||||
// but some logic depends on column_ids, init here to avoid many if elses
|
||||
if (_info.column_ids.empty()) {
|
||||
for (uint32_t col_id = 0; col_id < _schema->num_columns(); ++col_id) {
|
||||
_info.column_ids.push_back(col_id);
|
||||
}
|
||||
}
|
||||
_null_supported = block_info.null_supported;
|
||||
_capacity = _info.row_num;
|
||||
_compute_layout();
|
||||
|
||||
@ -58,8 +58,10 @@ RowBlockV2::~RowBlockV2() {
|
||||
delete[] _selection_vector;
|
||||
}
|
||||
|
||||
// RowBlockV2 has more columns than RowBlockV1, so that should use rowblock v1's columnids.
|
||||
// It means will omit some columns.
|
||||
Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) {
|
||||
for (auto cid : _schema.column_ids()) {
|
||||
for (auto cid : dst->row_block_info().column_ids) {
|
||||
bool is_nullable = _schema.column(cid)->is_nullable();
|
||||
if (is_nullable) {
|
||||
for (uint16_t i = 0; i < _selected_size; ++i) {
|
||||
|
||||
@ -46,9 +46,6 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
|
||||
// only statistics of this RowsetReader is necessary.
|
||||
_stats = _context->stats;
|
||||
}
|
||||
// SegmentIterator will load seek columns on demand
|
||||
_schema = std::make_unique<Schema>(_context->tablet_schema->columns(),
|
||||
*(_context->return_columns));
|
||||
|
||||
// convert RowsetReaderContext to StorageReadOptions
|
||||
StorageReadOptions read_options;
|
||||
@ -62,11 +59,27 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
|
||||
read_context->is_upper_keys_included->at(i));
|
||||
}
|
||||
}
|
||||
// delete_hanlder is always set, but it maybe not init, so that it will return empty conditions
|
||||
// or predicates when it is not inited.
|
||||
if (read_context->delete_handler != nullptr) {
|
||||
read_context->delete_handler->get_delete_conditions_after_version(
|
||||
_rowset->end_version(), &read_options.delete_conditions,
|
||||
read_options.delete_condition_predicates.get());
|
||||
}
|
||||
std::vector<uint32_t> read_columns;
|
||||
std::set<uint32_t> read_columns_set;
|
||||
std::set<uint32_t> delete_columns_set;
|
||||
for (int i = 0; i < _context->return_columns->size(); ++i) {
|
||||
read_columns.push_back(_context->return_columns->at(i));
|
||||
read_columns_set.insert(_context->return_columns->at(i));
|
||||
}
|
||||
read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
|
||||
for (auto cid : delete_columns_set) {
|
||||
if (read_columns_set.find(cid) == read_columns_set.end()) {
|
||||
read_columns.push_back(cid);
|
||||
}
|
||||
}
|
||||
_input_schema = std::make_unique<Schema>(_context->tablet_schema->columns(), read_columns);
|
||||
if (read_context->predicates != nullptr) {
|
||||
read_options.column_predicates.insert(read_options.column_predicates.end(),
|
||||
read_context->predicates->begin(),
|
||||
@ -109,7 +122,7 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
|
||||
std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators;
|
||||
for (auto& seg_ptr : _segment_cache_handle.get_segments()) {
|
||||
std::unique_ptr<RowwiseIterator> iter;
|
||||
auto s = seg_ptr->new_iterator(*_schema, read_options, &iter);
|
||||
auto s = seg_ptr->new_iterator(*_input_schema, read_options, &iter);
|
||||
if (!s.ok()) {
|
||||
LOG(WARNING) << "failed to create iterator[" << seg_ptr->id() << "]: " << s.to_string();
|
||||
return Status::OLAPInternalError(OLAP_ERR_ROWSET_READER_INIT);
|
||||
@ -156,7 +169,7 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
|
||||
_iterator.reset(final_iterator);
|
||||
|
||||
// init input block
|
||||
_input_block.reset(new RowBlockV2(*_schema, std::min(1024, read_context->batch_size)));
|
||||
_input_block.reset(new RowBlockV2(*_input_schema, std::min(1024, read_context->batch_size)));
|
||||
|
||||
if (!read_context->is_vec) {
|
||||
// init input/output block and row
|
||||
@ -165,12 +178,10 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
|
||||
RowBlockInfo output_block_info;
|
||||
output_block_info.row_num = std::min(1024, read_context->batch_size);
|
||||
output_block_info.null_supported = true;
|
||||
// the output block's schema should be seek_columns to conform to v1
|
||||
// TODO(hkp): this should be optimized to use return_columns
|
||||
output_block_info.column_ids = *(_context->seek_columns);
|
||||
output_block_info.column_ids = *(_context->return_columns);
|
||||
_output_block->init(output_block_info);
|
||||
_row.reset(new RowCursor());
|
||||
RETURN_NOT_OK(_row->init(read_context->tablet_schema, *(_context->seek_columns)));
|
||||
RETURN_NOT_OK(_row->init(read_context->tablet_schema, *(_context->return_columns)));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
||||
@ -66,7 +66,7 @@ public:
|
||||
private:
|
||||
bool _should_push_down_value_predicates() const;
|
||||
|
||||
std::unique_ptr<Schema> _schema;
|
||||
std::unique_ptr<Schema> _input_schema;
|
||||
RowsetReaderContext* _context;
|
||||
BetaRowsetSharedPtr _rowset;
|
||||
|
||||
|
||||
@ -304,7 +304,7 @@ Status BetaRowsetWriter::_create_segment_writer(
|
||||
if (!st.ok()) {
|
||||
LOG(WARNING) << "failed to create writable file. path=" << path
|
||||
<< ", err: " << st.get_error_msg();
|
||||
return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED);
|
||||
return st;
|
||||
}
|
||||
|
||||
DCHECK(file_writer != nullptr);
|
||||
@ -322,7 +322,7 @@ Status BetaRowsetWriter::_create_segment_writer(
|
||||
if (!s.ok()) {
|
||||
LOG(WARNING) << "failed to init segment writer: " << s.to_string();
|
||||
writer->reset(nullptr);
|
||||
return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED);
|
||||
return s;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -42,9 +42,6 @@ struct RowsetReaderContext {
|
||||
std::vector<uint32_t>* read_orderby_key_columns = nullptr;
|
||||
// projection columns: the set of columns rowset reader should return
|
||||
const std::vector<uint32_t>* return_columns = nullptr;
|
||||
// set of columns used to prune rows that doesn't satisfy key ranges and `conditions`.
|
||||
// currently it contains all columns from `return_columns`, `conditions`, `lower_bound_keys`, and `upper_bound_keys`
|
||||
const std::vector<uint32_t>* seek_columns = nullptr;
|
||||
// columns to load bloom filter index
|
||||
// including columns in "=" or "in" conditions
|
||||
const std::set<uint32_t>* load_bf_columns = nullptr;
|
||||
|
||||
@ -1778,8 +1778,6 @@ Status SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2&
|
||||
reader_context.need_ordered_result = true;
|
||||
reader_context.delete_handler = &delete_handler;
|
||||
reader_context.return_columns = &return_columns;
|
||||
// for schema change, seek_columns is the same to return_columns
|
||||
reader_context.seek_columns = &return_columns;
|
||||
reader_context.sequence_id_idx = reader_context.tablet_schema->sequence_col_idx();
|
||||
reader_context.is_unique = base_tablet->keys_type() == UNIQUE_KEYS;
|
||||
reader_context.batch_size = ALTER_TABLE_BATCH_SIZE;
|
||||
|
||||
@ -300,7 +300,6 @@ protected:
|
||||
reader_context.need_ordered_result = false;
|
||||
std::vector<uint32_t> return_columns = {0, 1};
|
||||
reader_context.return_columns = &return_columns;
|
||||
reader_context.seek_columns = &return_columns;
|
||||
reader_context.is_vec = true;
|
||||
RowsetReaderSharedPtr output_rs_reader;
|
||||
create_and_init_rowset_reader(out_rowset.get(), reader_context, &output_rs_reader);
|
||||
|
||||
@ -220,7 +220,6 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) {
|
||||
reader_context.need_ordered_result = true;
|
||||
std::vector<uint32_t> return_columns = {0, 1};
|
||||
reader_context.return_columns = &return_columns;
|
||||
reader_context.seek_columns = &return_columns;
|
||||
reader_context.stats = &_stats;
|
||||
|
||||
// without predicates
|
||||
@ -311,7 +310,6 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) {
|
||||
reader_context.need_ordered_result = false;
|
||||
std::vector<uint32_t> return_columns = {2};
|
||||
reader_context.return_columns = &return_columns;
|
||||
reader_context.seek_columns = &return_columns;
|
||||
reader_context.stats = &_stats;
|
||||
|
||||
// without predicate
|
||||
|
||||
@ -0,0 +1,79 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
2 2 2
|
||||
3 3 3
|
||||
2 2 2
|
||||
3 3 3
|
||||
4 4 4
|
||||
|
||||
-- !sql --
|
||||
2 2 2
|
||||
3 3 3
|
||||
2 2 2
|
||||
3 3 3
|
||||
4 4 4
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
3 3 3
|
||||
4 4 4
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
3 3 3
|
||||
4 4 4
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
3 3 3
|
||||
4 4 4
|
||||
6 6 6
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
3 3 3
|
||||
4 4 4
|
||||
6 6 6
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
3 3 3
|
||||
4 4 4
|
||||
6 6 6
|
||||
8 8 8
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
3 3 3
|
||||
4 4 4
|
||||
6 6 6
|
||||
8 8 8
|
||||
|
||||
-- !sql --
|
||||
2 2 2
|
||||
3 3 3
|
||||
4 4 4
|
||||
|
||||
-- !sql --
|
||||
2 2 2
|
||||
3 3 3
|
||||
4 4 4
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
4 4 4
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
4 4 4
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
4 4 4
|
||||
6 6 6
|
||||
|
||||
-- !sql --
|
||||
3 3 3
|
||||
4 4 4
|
||||
6 6 6
|
||||
|
||||
@ -0,0 +1,93 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_segment_iterator_delete") {
|
||||
def tableName = "delete_regression_test_segment_iterator"
|
||||
|
||||
// test duplicate key
|
||||
sql """ DROP TABLE IF EXISTS ${tableName} """
|
||||
sql """ CREATE TABLE ${tableName} (c1 int NOT NULL, c2 int NOT NULL , c3 int not null ) ENGINE=OLAP DUPLICATE KEY(c1, c2) COMMENT "OLAP" DISTRIBUTED BY HASH(c3) BUCKETS 1
|
||||
PROPERTIES ( "replication_num" = "1" );"""
|
||||
|
||||
sql """INSERT INTO ${tableName} VALUES (1,1,1)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (2,2,2)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (3,3,3)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (1,1,1)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (2,2,2)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (3,3,3)"""
|
||||
|
||||
// delete first key
|
||||
sql """delete from ${tableName} where c1 = 1;"""
|
||||
sql """INSERT INTO ${tableName} VALUES (4,4,4)"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};"""
|
||||
|
||||
// delete second key
|
||||
sql """delete from ${tableName} where c2 = 2;"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};"""
|
||||
|
||||
// delete by multi columns
|
||||
sql """INSERT INTO ${tableName} VALUES (5,5,5)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (6,6,6)"""
|
||||
sql """delete from ${tableName} where c1 = 5 and c2=5;"""
|
||||
sql """delete from ${tableName} where c1 = 5 and c2=6;"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};"""
|
||||
|
||||
|
||||
// delete by value columns
|
||||
sql """INSERT INTO ${tableName} VALUES (7,7,7)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (8,8,8)"""
|
||||
sql """delete from ${tableName} where c3 = 7;"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};"""
|
||||
|
||||
|
||||
sql """drop table ${tableName} force"""
|
||||
|
||||
// test unique key
|
||||
sql """ DROP TABLE IF EXISTS ${tableName} """
|
||||
sql """ CREATE TABLE ${tableName} (c1 int NOT NULL, c2 int NOT NULL , c3 int not null ) ENGINE=OLAP UNIQUE KEY(c1, c2) COMMENT "OLAP" DISTRIBUTED BY HASH(c1) BUCKETS 1
|
||||
PROPERTIES ( "replication_num" = "1" );"""
|
||||
|
||||
sql """INSERT INTO ${tableName} VALUES (1,1,1)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (1,1,3)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (2,2,2)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (3,3,3)"""
|
||||
|
||||
// delete first key
|
||||
sql """delete from ${tableName} where c1 = 1;"""
|
||||
sql """INSERT INTO ${tableName} VALUES (4,4,4)"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};"""
|
||||
|
||||
// delete second key
|
||||
sql """delete from ${tableName} where c2 = 2;"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};"""
|
||||
|
||||
// delete by multi columns
|
||||
sql """INSERT INTO ${tableName} VALUES (5,5,5)"""
|
||||
sql """INSERT INTO ${tableName} VALUES (6,6,6)"""
|
||||
sql """delete from ${tableName} where c1 = 5 and c2=5;"""
|
||||
sql """delete from ${tableName} where c1 = 5 and c2=6;"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};"""
|
||||
qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};"""
|
||||
|
||||
sql """drop table ${tableName} force"""
|
||||
}
|
||||
Reference in New Issue
Block a user