[refactor](remove-non-vec) remove some non vec code in segment iterator and remove reuse schema opt since it is introduced in non-vec code (#15407)

Co-authored-by: yiguolei <yiguolei@gmail.com>
This commit is contained in:
yiguolei
2022-12-27 22:30:02 +08:00
committed by GitHub
parent 51b14c06d3
commit 700f963571
5 changed files with 19 additions and 70 deletions

View File

@ -72,40 +72,23 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
read_context->delete_handler->get_delete_conditions_after_version(
_rowset->end_version(), _read_options.delete_condition_predicates.get(),
&_read_options.col_id_to_del_predicates);
// if del cond is not empty, schema may be different in multiple rowset
_can_reuse_schema = _read_options.col_id_to_del_predicates.empty();
}
// In vertical compaction, every column group need new schema
if (read_context->is_vertical_compaction) {
_can_reuse_schema = false;
}
if (!_can_reuse_schema || _context->reuse_input_schema == nullptr) {
std::vector<uint32_t> read_columns;
std::set<uint32_t> read_columns_set;
std::set<uint32_t> delete_columns_set;
for (int i = 0; i < _context->return_columns->size(); ++i) {
read_columns.push_back(_context->return_columns->at(i));
read_columns_set.insert(_context->return_columns->at(i));
}
_read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
for (auto cid : delete_columns_set) {
if (read_columns_set.find(cid) == read_columns_set.end()) {
read_columns.push_back(cid);
}
}
VLOG_NOTICE << "read columns size: " << read_columns.size();
_input_schema = std::make_shared<Schema>(_context->tablet_schema->columns(), read_columns);
if (_can_reuse_schema) {
_context->reuse_input_schema = _input_schema;
}
}
// if can reuse schema, context must have reuse_input_schema
// if can't reuse schema, context mustn't have reuse_input_schema
DCHECK(_can_reuse_schema ^ (_context->reuse_input_schema == nullptr));
if (_context->reuse_input_schema != nullptr && _input_schema == nullptr) {
_input_schema = _context->reuse_input_schema;
std::vector<uint32_t> read_columns;
std::set<uint32_t> read_columns_set;
std::set<uint32_t> delete_columns_set;
for (int i = 0; i < _context->return_columns->size(); ++i) {
read_columns.push_back(_context->return_columns->at(i));
read_columns_set.insert(_context->return_columns->at(i));
}
_read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
for (auto cid : delete_columns_set) {
if (read_columns_set.find(cid) == read_columns_set.end()) {
read_columns.push_back(cid);
}
}
VLOG_NOTICE << "read columns size: " << read_columns.size();
_input_schema = std::make_shared<Schema>(_context->tablet_schema->columns(), read_columns);
if (read_context->predicates != nullptr) {
_read_options.column_predicates.insert(_read_options.column_predicates.end(),

View File

@ -89,7 +89,6 @@ private:
SegmentCacheHandle _segment_cache_handle;
StorageReadOptions _read_options;
bool _can_reuse_schema = true;
};
} // namespace doris

View File

@ -67,7 +67,6 @@ struct RowsetReaderContext {
bool record_rowids = false;
bool is_vertical_compaction = false;
bool is_key_column_group = false;
std::shared_ptr<Schema> reuse_input_schema;
};
} // namespace doris

View File

@ -171,7 +171,7 @@ Status SegmentIterator::init(const StorageReadOptions& opts) {
return Status::OK();
}
Status SegmentIterator::_init(bool is_vec) {
Status SegmentIterator::_init() {
SCOPED_RAW_TIMER(&_opts.stats->block_init_ns);
DorisMetrics::instance()->segment_read_total->increment(1);
// get file handle from file descriptor of segment
@ -185,12 +185,8 @@ Status SegmentIterator::_init(bool is_vec) {
RETURN_IF_ERROR(_get_row_ranges_by_keys());
}
RETURN_IF_ERROR(_get_row_ranges_by_column_conditions());
if (is_vec) {
_vec_init_lazy_materialization();
_vec_init_char_column_id();
} else {
_init_lazy_materialization();
}
_vec_init_lazy_materialization();
_vec_init_char_column_id();
// Remove rows that have been marked deleted
if (_opts.delete_bitmap.count(segment_id()) > 0 &&
_opts.delete_bitmap[segment_id()] != nullptr) {
@ -579,34 +575,6 @@ Status SegmentIterator::_seek_and_peek(rowid_t rowid) {
return Status::OK();
}
void SegmentIterator::_init_lazy_materialization() {
if (!_col_predicates.empty()) {
std::set<ColumnId> predicate_columns;
for (auto predicate : _col_predicates) {
predicate_columns.insert(predicate->column_id());
}
_opts.delete_condition_predicates->get_all_column_ids(predicate_columns);
// ARRAY column do not support lazy materialization read
for (auto cid : _schema.column_ids()) {
if (_schema.column(cid)->type() == OLAP_FIELD_TYPE_ARRAY) {
predicate_columns.insert(cid);
}
}
// when all return columns have predicates, disable lazy materialization to avoid its overhead
if (_schema.column_ids().size() > predicate_columns.size()) {
_lazy_materialization_read = true;
_predicate_columns.assign(predicate_columns.cbegin(), predicate_columns.cend());
for (auto cid : _schema.column_ids()) {
if (predicate_columns.find(cid) == predicate_columns.end()) {
_non_predicate_columns.push_back(cid);
}
}
}
}
}
Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, rowid_t pos) {
for (auto cid : column_ids) {
RETURN_IF_ERROR(_column_iterators[_schema.unique_id(cid)]->seek_to_ordinal(pos));
@ -1000,7 +968,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
SCOPED_RAW_TIMER(&_opts.stats->block_load_ns);
if (UNLIKELY(!_inited)) {
RETURN_IF_ERROR(_init(true));
RETURN_IF_ERROR(_init());
_inited = true;
if (_lazy_materialization_read || _opts.record_rowids) {
_block_rowids.resize(_opts.block_row_max);

View File

@ -93,7 +93,7 @@ private:
return true;
}
Status _init(bool is_vec = false);
Status _init();
Status _init_return_column_iterators();
Status _init_bitmap_index_iterators();