[opt](inverted index) Optimization of the initialization process in topn (#38870)
pick https://github.com/apache/doris/pull/37722
This commit is contained in:
@ -1879,7 +1879,8 @@ Status SegmentIterator::_read_columns(const std::vector<ColumnId>& column_ids,
|
||||
}
|
||||
|
||||
Status SegmentIterator::_init_current_block(
|
||||
vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>& current_columns) {
|
||||
vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>& current_columns,
|
||||
uint32_t nrows_read_limit) {
|
||||
block->clear_column_data(_schema->num_column_ids());
|
||||
|
||||
for (size_t i = 0; i < _schema->num_column_ids(); i++) {
|
||||
@ -1899,7 +1900,7 @@ Status SegmentIterator::_init_current_block(
|
||||
column_desc->path() == nullptr ? "" : column_desc->path()->get_path());
|
||||
// TODO reuse
|
||||
current_columns[cid] = file_column_type->create_column();
|
||||
current_columns[cid]->reserve(_opts.block_row_max);
|
||||
current_columns[cid]->reserve(nrows_read_limit);
|
||||
} else {
|
||||
// the column in block must clear() here to insert new data
|
||||
if (_is_pred_column[cid] ||
|
||||
@ -1918,7 +1919,7 @@ Status SegmentIterator::_init_current_block(
|
||||
} else if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_DATETIME) {
|
||||
current_columns[cid]->set_datetime_type();
|
||||
}
|
||||
current_columns[cid]->reserve(_opts.block_row_max);
|
||||
current_columns[cid]->reserve(nrows_read_limit);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2332,14 +2333,16 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
|
||||
}
|
||||
}
|
||||
}
|
||||
RETURN_IF_ERROR(_init_current_block(block, _current_return_columns));
|
||||
_converted_column_ids.assign(_schema->columns().size(), 0);
|
||||
|
||||
_current_batch_rows_read = 0;
|
||||
uint32_t nrows_read_limit = _opts.block_row_max;
|
||||
if (_can_opt_topn_reads()) {
|
||||
nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit), nrows_read_limit);
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_init_current_block(block, _current_return_columns, nrows_read_limit));
|
||||
_converted_column_ids.assign(_schema->columns().size(), 0);
|
||||
|
||||
_current_batch_rows_read = 0;
|
||||
RETURN_IF_ERROR(_read_columns_by_index(
|
||||
nrows_read_limit, _current_batch_rows_read,
|
||||
_lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval));
|
||||
|
||||
@ -222,7 +222,8 @@ private:
|
||||
bool set_block_rowid);
|
||||
void _replace_version_col(size_t num_rows);
|
||||
Status _init_current_block(vectorized::Block* block,
|
||||
std::vector<vectorized::MutableColumnPtr>& non_pred_vector);
|
||||
std::vector<vectorized::MutableColumnPtr>& non_pred_vector,
|
||||
uint32_t nrows_read_limit);
|
||||
uint16_t _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t selected_size);
|
||||
uint16_t _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx, uint16_t selected_size);
|
||||
void _output_non_pred_columns(vectorized::Block* block);
|
||||
|
||||
Reference in New Issue
Block a user