diff --git a/be/src/olap/collect_iterator.cpp b/be/src/olap/collect_iterator.cpp index 138110ba06..4b088fbeb6 100644 --- a/be/src/olap/collect_iterator.cpp +++ b/be/src/olap/collect_iterator.cpp @@ -202,7 +202,7 @@ CollectIterator::Level0Iterator::Level0Iterator(RowsetReaderSharedPtr rs_reader, CollectIterator::Level0Iterator::~Level0Iterator() = default; Status CollectIterator::Level0Iterator::init() { - RETURN_NOT_OK_LOG(_row_cursor.init(_reader->_tablet_schema, _reader->_seek_columns), + RETURN_NOT_OK_LOG(_row_cursor.init(_reader->_tablet_schema, _reader->_return_columns), "failed to init row cursor"); return (this->*_refresh_current_row)(); } diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index 694daacb99..955d6699a6 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -212,7 +212,6 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params, _reader_context.return_columns = &_return_columns; _reader_context.read_orderby_key_columns = _orderby_key_columns.size() > 0 ? &_orderby_key_columns : nullptr; - _reader_context.seek_columns = &_seek_columns; _reader_context.load_bf_columns = &_load_bf_columns; _reader_context.load_bf_all_columns = &_load_bf_all_columns; _reader_context.conditions = &_conditions; @@ -270,15 +269,11 @@ Status TabletReader::_init_params(const ReaderParams& read_params) { LOG(WARNING) << "fail to init keys param. res=" << res; return res; } - res = _init_orderby_keys_param(read_params); if (!res.ok()) { LOG(WARNING) << "fail to init orderby keys param. res=" << res; return res; } - - _init_seek_columns(); - if (_tablet_schema->has_sequence_col()) { auto sequence_col_idx = _tablet_schema->sequence_col_idx(); DCHECK_NE(sequence_col_idx, -1); @@ -298,19 +293,6 @@ Status TabletReader::_init_return_columns(const ReaderParams& read_params) { if (read_params.reader_type == READER_QUERY) { _return_columns = read_params.return_columns; _tablet_columns_convert_to_null_set = read_params.tablet_columns_convert_to_null_set; - - if (!_delete_handler.empty()) { - // We need to fetch columns which there are deletion conditions on them. - std::set column_set(_return_columns.begin(), _return_columns.end()); - for (const auto& conds : _delete_handler.get_delete_conditions()) { - for (const auto& cond_column : conds.del_cond->columns()) { - if (column_set.find(cond_column.first) == column_set.end()) { - column_set.insert(cond_column.first); - _return_columns.push_back(cond_column.first); - } - } - } - } for (auto id : read_params.return_columns) { if (_tablet_schema->column(id).is_key()) { _key_cids.push_back(id); @@ -360,26 +342,6 @@ Status TabletReader::_init_return_columns(const ReaderParams& read_params) { return Status::OK(); } -void TabletReader::_init_seek_columns() { - std::unordered_set column_set(_return_columns.begin(), _return_columns.end()); - for (auto& it : _conditions.columns()) { - column_set.insert(it.first); - } - size_t max_key_column_count = 0; - for (const auto& key : _keys_param.start_keys) { - max_key_column_count = std::max(max_key_column_count, key.field_count()); - } - for (const auto& key : _keys_param.end_keys) { - max_key_column_count = std::max(max_key_column_count, key.field_count()); - } - - for (size_t i = 0; i < _tablet_schema->num_columns(); i++) { - if (i < max_key_column_count || column_set.find(i) != column_set.end()) { - _seek_columns.push_back(i); - } - } -} - Status TabletReader::_init_keys_param(const ReaderParams& read_params) { if (read_params.start_key.empty()) { return Status::OK(); diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h index 5925cdf434..8fd39e7fef 100644 --- a/be/src/olap/reader.h +++ b/be/src/olap/reader.h @@ -175,7 +175,6 @@ protected: Status _init_delete_condition(const ReaderParams& read_params); Status _init_return_columns(const ReaderParams& read_params); - void _init_seek_columns(); void _init_load_bf_columns(const ReaderParams& read_params); void _init_load_bf_columns(const ReaderParams& read_params, Conditions* conditions, @@ -194,7 +193,6 @@ protected: // only use in outer join which change the column nullable which must keep same in // vec query engine std::unordered_set* _tablet_columns_convert_to_null_set = nullptr; - std::vector _seek_columns; TabletSharedPtr _tablet; RowsetReaderContext _reader_context; diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp index 8d5e7677a2..2d66985933 100644 --- a/be/src/olap/row_block.cpp +++ b/be/src/olap/row_block.cpp @@ -47,6 +47,13 @@ RowBlock::~RowBlock() { void RowBlock::init(const RowBlockInfo& block_info) { _info = block_info; + // Sometimes info.column_ids is not set, which means all columns in the schema + // but some logic depends on column_ids, init here to avoid many if elses + if (_info.column_ids.empty()) { + for (uint32_t col_id = 0; col_id < _schema->num_columns(); ++col_id) { + _info.column_ids.push_back(col_id); + } + } _null_supported = block_info.null_supported; _capacity = _info.row_num; _compute_layout(); diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index f647018f58..f0facd7afb 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -58,8 +58,10 @@ RowBlockV2::~RowBlockV2() { delete[] _selection_vector; } +// RowBlockV2 has more columns than RowBlockV1, so that should use rowblock v1's columnids. +// It means will omit some columns. Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) { - for (auto cid : _schema.column_ids()) { + for (auto cid : dst->row_block_info().column_ids) { bool is_nullable = _schema.column(cid)->is_nullable(); if (is_nullable) { for (uint16_t i = 0; i < _selected_size; ++i) { diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index f7e93bfca9..3c848a43b6 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -46,9 +46,6 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { // only statistics of this RowsetReader is necessary. _stats = _context->stats; } - // SegmentIterator will load seek columns on demand - _schema = std::make_unique(_context->tablet_schema->columns(), - *(_context->return_columns)); // convert RowsetReaderContext to StorageReadOptions StorageReadOptions read_options; @@ -62,11 +59,27 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { read_context->is_upper_keys_included->at(i)); } } + // delete_hanlder is always set, but it maybe not init, so that it will return empty conditions + // or predicates when it is not inited. if (read_context->delete_handler != nullptr) { read_context->delete_handler->get_delete_conditions_after_version( _rowset->end_version(), &read_options.delete_conditions, read_options.delete_condition_predicates.get()); } + std::vector read_columns; + std::set read_columns_set; + std::set delete_columns_set; + for (int i = 0; i < _context->return_columns->size(); ++i) { + read_columns.push_back(_context->return_columns->at(i)); + read_columns_set.insert(_context->return_columns->at(i)); + } + read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set); + for (auto cid : delete_columns_set) { + if (read_columns_set.find(cid) == read_columns_set.end()) { + read_columns.push_back(cid); + } + } + _input_schema = std::make_unique(_context->tablet_schema->columns(), read_columns); if (read_context->predicates != nullptr) { read_options.column_predicates.insert(read_options.column_predicates.end(), read_context->predicates->begin(), @@ -109,7 +122,7 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { std::vector> seg_iterators; for (auto& seg_ptr : _segment_cache_handle.get_segments()) { std::unique_ptr iter; - auto s = seg_ptr->new_iterator(*_schema, read_options, &iter); + auto s = seg_ptr->new_iterator(*_input_schema, read_options, &iter); if (!s.ok()) { LOG(WARNING) << "failed to create iterator[" << seg_ptr->id() << "]: " << s.to_string(); return Status::OLAPInternalError(OLAP_ERR_ROWSET_READER_INIT); @@ -156,7 +169,7 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { _iterator.reset(final_iterator); // init input block - _input_block.reset(new RowBlockV2(*_schema, std::min(1024, read_context->batch_size))); + _input_block.reset(new RowBlockV2(*_input_schema, std::min(1024, read_context->batch_size))); if (!read_context->is_vec) { // init input/output block and row @@ -165,12 +178,10 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { RowBlockInfo output_block_info; output_block_info.row_num = std::min(1024, read_context->batch_size); output_block_info.null_supported = true; - // the output block's schema should be seek_columns to conform to v1 - // TODO(hkp): this should be optimized to use return_columns - output_block_info.column_ids = *(_context->seek_columns); + output_block_info.column_ids = *(_context->return_columns); _output_block->init(output_block_info); _row.reset(new RowCursor()); - RETURN_NOT_OK(_row->init(read_context->tablet_schema, *(_context->seek_columns))); + RETURN_NOT_OK(_row->init(read_context->tablet_schema, *(_context->return_columns))); } return Status::OK(); diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h index bbf8d84962..9cf9ef599b 100644 --- a/be/src/olap/rowset/beta_rowset_reader.h +++ b/be/src/olap/rowset/beta_rowset_reader.h @@ -66,7 +66,7 @@ public: private: bool _should_push_down_value_predicates() const; - std::unique_ptr _schema; + std::unique_ptr _input_schema; RowsetReaderContext* _context; BetaRowsetSharedPtr _rowset; diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 851901adb0..cf916f0eb4 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -304,7 +304,7 @@ Status BetaRowsetWriter::_create_segment_writer( if (!st.ok()) { LOG(WARNING) << "failed to create writable file. path=" << path << ", err: " << st.get_error_msg(); - return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED); + return st; } DCHECK(file_writer != nullptr); @@ -322,7 +322,7 @@ Status BetaRowsetWriter::_create_segment_writer( if (!s.ok()) { LOG(WARNING) << "failed to init segment writer: " << s.to_string(); writer->reset(nullptr); - return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED); + return s; } return Status::OK(); } diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h index 0ee9026f6a..dcb56f8d6a 100644 --- a/be/src/olap/rowset/rowset_reader_context.h +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -42,9 +42,6 @@ struct RowsetReaderContext { std::vector* read_orderby_key_columns = nullptr; // projection columns: the set of columns rowset reader should return const std::vector* return_columns = nullptr; - // set of columns used to prune rows that doesn't satisfy key ranges and `conditions`. - // currently it contains all columns from `return_columns`, `conditions`, `lower_bound_keys`, and `upper_bound_keys` - const std::vector* seek_columns = nullptr; // columns to load bloom filter index // including columns in "=" or "in" conditions const std::set* load_bf_columns = nullptr; diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index aa455699b1..291a8bc8fe 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -1778,8 +1778,6 @@ Status SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2& reader_context.need_ordered_result = true; reader_context.delete_handler = &delete_handler; reader_context.return_columns = &return_columns; - // for schema change, seek_columns is the same to return_columns - reader_context.seek_columns = &return_columns; reader_context.sequence_id_idx = reader_context.tablet_schema->sequence_col_idx(); reader_context.is_unique = base_tablet->keys_type() == UNIQUE_KEYS; reader_context.batch_size = ALTER_TABLE_BATCH_SIZE; diff --git a/be/test/olap/rowid_conversion_test.cpp b/be/test/olap/rowid_conversion_test.cpp index 1bae4f83e9..ac5dede402 100644 --- a/be/test/olap/rowid_conversion_test.cpp +++ b/be/test/olap/rowid_conversion_test.cpp @@ -300,7 +300,6 @@ protected: reader_context.need_ordered_result = false; std::vector return_columns = {0, 1}; reader_context.return_columns = &return_columns; - reader_context.seek_columns = &return_columns; reader_context.is_vec = true; RowsetReaderSharedPtr output_rs_reader; create_and_init_rowset_reader(out_rowset.get(), reader_context, &output_rs_reader); diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp index bea3e763d9..5dd53cd490 100644 --- a/be/test/olap/rowset/beta_rowset_test.cpp +++ b/be/test/olap/rowset/beta_rowset_test.cpp @@ -220,7 +220,6 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) { reader_context.need_ordered_result = true; std::vector return_columns = {0, 1}; reader_context.return_columns = &return_columns; - reader_context.seek_columns = &return_columns; reader_context.stats = &_stats; // without predicates @@ -311,7 +310,6 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) { reader_context.need_ordered_result = false; std::vector return_columns = {2}; reader_context.return_columns = &return_columns; - reader_context.seek_columns = &return_columns; reader_context.stats = &_stats; // without predicate diff --git a/regression-test/data/delete_p0/test_segment_iterator_delete.out b/regression-test/data/delete_p0/test_segment_iterator_delete.out new file mode 100644 index 0000000000..41a422e4f4 --- /dev/null +++ b/regression-test/data/delete_p0/test_segment_iterator_delete.out @@ -0,0 +1,79 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +2 2 2 +3 3 3 +2 2 2 +3 3 3 +4 4 4 + +-- !sql -- +2 2 2 +3 3 3 +2 2 2 +3 3 3 +4 4 4 + +-- !sql -- +3 3 3 +3 3 3 +4 4 4 + +-- !sql -- +3 3 3 +3 3 3 +4 4 4 + +-- !sql -- +3 3 3 +3 3 3 +4 4 4 +6 6 6 + +-- !sql -- +3 3 3 +3 3 3 +4 4 4 +6 6 6 + +-- !sql -- +3 3 3 +3 3 3 +4 4 4 +6 6 6 +8 8 8 + +-- !sql -- +3 3 3 +3 3 3 +4 4 4 +6 6 6 +8 8 8 + +-- !sql -- +2 2 2 +3 3 3 +4 4 4 + +-- !sql -- +2 2 2 +3 3 3 +4 4 4 + +-- !sql -- +3 3 3 +4 4 4 + +-- !sql -- +3 3 3 +4 4 4 + +-- !sql -- +3 3 3 +4 4 4 +6 6 6 + +-- !sql -- +3 3 3 +4 4 4 +6 6 6 + diff --git a/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy b/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy new file mode 100644 index 0000000000..177299a447 --- /dev/null +++ b/regression-test/suites/delete_p0/test_segment_iterator_delete.groovy @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_segment_iterator_delete") { + def tableName = "delete_regression_test_segment_iterator" + + // test duplicate key + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} (c1 int NOT NULL, c2 int NOT NULL , c3 int not null ) ENGINE=OLAP DUPLICATE KEY(c1, c2) COMMENT "OLAP" DISTRIBUTED BY HASH(c3) BUCKETS 1 + PROPERTIES ( "replication_num" = "1" );""" + + sql """INSERT INTO ${tableName} VALUES (1,1,1)""" + sql """INSERT INTO ${tableName} VALUES (2,2,2)""" + sql """INSERT INTO ${tableName} VALUES (3,3,3)""" + sql """INSERT INTO ${tableName} VALUES (1,1,1)""" + sql """INSERT INTO ${tableName} VALUES (2,2,2)""" + sql """INSERT INTO ${tableName} VALUES (3,3,3)""" + + // delete first key + sql """delete from ${tableName} where c1 = 1;""" + sql """INSERT INTO ${tableName} VALUES (4,4,4)""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};""" + + // delete second key + sql """delete from ${tableName} where c2 = 2;""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};""" + + // delete by multi columns + sql """INSERT INTO ${tableName} VALUES (5,5,5)""" + sql """INSERT INTO ${tableName} VALUES (6,6,6)""" + sql """delete from ${tableName} where c1 = 5 and c2=5;""" + sql """delete from ${tableName} where c1 = 5 and c2=6;""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};""" + + + // delete by value columns + sql """INSERT INTO ${tableName} VALUES (7,7,7)""" + sql """INSERT INTO ${tableName} VALUES (8,8,8)""" + sql """delete from ${tableName} where c3 = 7;""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};""" + + + sql """drop table ${tableName} force""" + + // test unique key + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} (c1 int NOT NULL, c2 int NOT NULL , c3 int not null ) ENGINE=OLAP UNIQUE KEY(c1, c2) COMMENT "OLAP" DISTRIBUTED BY HASH(c1) BUCKETS 1 + PROPERTIES ( "replication_num" = "1" );""" + + sql """INSERT INTO ${tableName} VALUES (1,1,1)""" + sql """INSERT INTO ${tableName} VALUES (1,1,3)""" + sql """INSERT INTO ${tableName} VALUES (2,2,2)""" + sql """INSERT INTO ${tableName} VALUES (3,3,3)""" + + // delete first key + sql """delete from ${tableName} where c1 = 1;""" + sql """INSERT INTO ${tableName} VALUES (4,4,4)""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};""" + + // delete second key + sql """delete from ${tableName} where c2 = 2;""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};""" + + // delete by multi columns + sql """INSERT INTO ${tableName} VALUES (5,5,5)""" + sql """INSERT INTO ${tableName} VALUES (6,6,6)""" + sql """delete from ${tableName} where c1 = 5 and c2=5;""" + sql """delete from ${tableName} where c1 = 5 and c2=6;""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=false) */ * from ${tableName};""" + qt_sql """select /*+ SET_VAR(enable_vectorized_engine=true) */ * from ${tableName};""" + + sql """drop table ${tableName} force""" +}