[refactor](remove-non-vec) remove some non vec code in segment iterator and remove reuse schema opt since it is introduced in non-vec code (#15407)
Co-authored-by: yiguolei <yiguolei@gmail.com>
This commit is contained in:
@ -72,40 +72,23 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
|
||||
read_context->delete_handler->get_delete_conditions_after_version(
|
||||
_rowset->end_version(), _read_options.delete_condition_predicates.get(),
|
||||
&_read_options.col_id_to_del_predicates);
|
||||
// if del cond is not empty, schema may be different in multiple rowset
|
||||
_can_reuse_schema = _read_options.col_id_to_del_predicates.empty();
|
||||
}
|
||||
// In vertical compaction, every column group need new schema
|
||||
if (read_context->is_vertical_compaction) {
|
||||
_can_reuse_schema = false;
|
||||
}
|
||||
if (!_can_reuse_schema || _context->reuse_input_schema == nullptr) {
|
||||
std::vector<uint32_t> read_columns;
|
||||
std::set<uint32_t> read_columns_set;
|
||||
std::set<uint32_t> delete_columns_set;
|
||||
for (int i = 0; i < _context->return_columns->size(); ++i) {
|
||||
read_columns.push_back(_context->return_columns->at(i));
|
||||
read_columns_set.insert(_context->return_columns->at(i));
|
||||
}
|
||||
_read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
|
||||
for (auto cid : delete_columns_set) {
|
||||
if (read_columns_set.find(cid) == read_columns_set.end()) {
|
||||
read_columns.push_back(cid);
|
||||
}
|
||||
}
|
||||
VLOG_NOTICE << "read columns size: " << read_columns.size();
|
||||
_input_schema = std::make_shared<Schema>(_context->tablet_schema->columns(), read_columns);
|
||||
if (_can_reuse_schema) {
|
||||
_context->reuse_input_schema = _input_schema;
|
||||
}
|
||||
}
|
||||
|
||||
// if can reuse schema, context must have reuse_input_schema
|
||||
// if can't reuse schema, context mustn't have reuse_input_schema
|
||||
DCHECK(_can_reuse_schema ^ (_context->reuse_input_schema == nullptr));
|
||||
if (_context->reuse_input_schema != nullptr && _input_schema == nullptr) {
|
||||
_input_schema = _context->reuse_input_schema;
|
||||
std::vector<uint32_t> read_columns;
|
||||
std::set<uint32_t> read_columns_set;
|
||||
std::set<uint32_t> delete_columns_set;
|
||||
for (int i = 0; i < _context->return_columns->size(); ++i) {
|
||||
read_columns.push_back(_context->return_columns->at(i));
|
||||
read_columns_set.insert(_context->return_columns->at(i));
|
||||
}
|
||||
_read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
|
||||
for (auto cid : delete_columns_set) {
|
||||
if (read_columns_set.find(cid) == read_columns_set.end()) {
|
||||
read_columns.push_back(cid);
|
||||
}
|
||||
}
|
||||
VLOG_NOTICE << "read columns size: " << read_columns.size();
|
||||
_input_schema = std::make_shared<Schema>(_context->tablet_schema->columns(), read_columns);
|
||||
|
||||
if (read_context->predicates != nullptr) {
|
||||
_read_options.column_predicates.insert(_read_options.column_predicates.end(),
|
||||
|
||||
@ -89,7 +89,6 @@ private:
|
||||
SegmentCacheHandle _segment_cache_handle;
|
||||
|
||||
StorageReadOptions _read_options;
|
||||
bool _can_reuse_schema = true;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -67,7 +67,6 @@ struct RowsetReaderContext {
|
||||
bool record_rowids = false;
|
||||
bool is_vertical_compaction = false;
|
||||
bool is_key_column_group = false;
|
||||
std::shared_ptr<Schema> reuse_input_schema;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -171,7 +171,7 @@ Status SegmentIterator::init(const StorageReadOptions& opts) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SegmentIterator::_init(bool is_vec) {
|
||||
Status SegmentIterator::_init() {
|
||||
SCOPED_RAW_TIMER(&_opts.stats->block_init_ns);
|
||||
DorisMetrics::instance()->segment_read_total->increment(1);
|
||||
// get file handle from file descriptor of segment
|
||||
@ -185,12 +185,8 @@ Status SegmentIterator::_init(bool is_vec) {
|
||||
RETURN_IF_ERROR(_get_row_ranges_by_keys());
|
||||
}
|
||||
RETURN_IF_ERROR(_get_row_ranges_by_column_conditions());
|
||||
if (is_vec) {
|
||||
_vec_init_lazy_materialization();
|
||||
_vec_init_char_column_id();
|
||||
} else {
|
||||
_init_lazy_materialization();
|
||||
}
|
||||
_vec_init_lazy_materialization();
|
||||
_vec_init_char_column_id();
|
||||
// Remove rows that have been marked deleted
|
||||
if (_opts.delete_bitmap.count(segment_id()) > 0 &&
|
||||
_opts.delete_bitmap[segment_id()] != nullptr) {
|
||||
@ -579,34 +575,6 @@ Status SegmentIterator::_seek_and_peek(rowid_t rowid) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void SegmentIterator::_init_lazy_materialization() {
|
||||
if (!_col_predicates.empty()) {
|
||||
std::set<ColumnId> predicate_columns;
|
||||
for (auto predicate : _col_predicates) {
|
||||
predicate_columns.insert(predicate->column_id());
|
||||
}
|
||||
_opts.delete_condition_predicates->get_all_column_ids(predicate_columns);
|
||||
|
||||
// ARRAY column do not support lazy materialization read
|
||||
for (auto cid : _schema.column_ids()) {
|
||||
if (_schema.column(cid)->type() == OLAP_FIELD_TYPE_ARRAY) {
|
||||
predicate_columns.insert(cid);
|
||||
}
|
||||
}
|
||||
|
||||
// when all return columns have predicates, disable lazy materialization to avoid its overhead
|
||||
if (_schema.column_ids().size() > predicate_columns.size()) {
|
||||
_lazy_materialization_read = true;
|
||||
_predicate_columns.assign(predicate_columns.cbegin(), predicate_columns.cend());
|
||||
for (auto cid : _schema.column_ids()) {
|
||||
if (predicate_columns.find(cid) == predicate_columns.end()) {
|
||||
_non_predicate_columns.push_back(cid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, rowid_t pos) {
|
||||
for (auto cid : column_ids) {
|
||||
RETURN_IF_ERROR(_column_iterators[_schema.unique_id(cid)]->seek_to_ordinal(pos));
|
||||
@ -1000,7 +968,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
|
||||
|
||||
SCOPED_RAW_TIMER(&_opts.stats->block_load_ns);
|
||||
if (UNLIKELY(!_inited)) {
|
||||
RETURN_IF_ERROR(_init(true));
|
||||
RETURN_IF_ERROR(_init());
|
||||
_inited = true;
|
||||
if (_lazy_materialization_read || _opts.record_rowids) {
|
||||
_block_rowids.resize(_opts.block_row_max);
|
||||
|
||||
@ -93,7 +93,7 @@ private:
|
||||
return true;
|
||||
}
|
||||
|
||||
Status _init(bool is_vec = false);
|
||||
Status _init();
|
||||
|
||||
Status _init_return_column_iterators();
|
||||
Status _init_bitmap_index_iterators();
|
||||
|
||||
Reference in New Issue
Block a user