[fix](read) remove logic of estimating count of rows to read in segment iterator to avoid wrong result of unique key. (#29109)
This commit is contained in:
@ -263,8 +263,6 @@ SegmentIterator::SegmentIterator(std::shared_ptr<Segment> segment, SchemaSPtr sc
|
||||
_lazy_materialization_read(false),
|
||||
_lazy_inited(false),
|
||||
_inited(false),
|
||||
_estimate_row_size(true),
|
||||
_wait_times_estimate_row_size(10),
|
||||
_pool(new ObjectPool) {}
|
||||
|
||||
Status SegmentIterator::init(const StorageReadOptions& opts) {
|
||||
@ -2167,13 +2165,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
|
||||
|
||||
_current_batch_rows_read = 0;
|
||||
uint32_t nrows_read_limit = _opts.block_row_max;
|
||||
if (_wait_times_estimate_row_size > 0) {
|
||||
// first time, read 100 rows to estimate average row size, to avoid oom caused by a single batch being too large.
|
||||
// If no valid data is read for the first time, block_row_max is read each time thereafter.
|
||||
// Avoid low performance when valid data cannot be read all the time
|
||||
nrows_read_limit = std::min(nrows_read_limit, (uint32_t)100);
|
||||
_wait_times_estimate_row_size--;
|
||||
}
|
||||
RETURN_IF_ERROR(_read_columns_by_index(
|
||||
nrows_read_limit, _current_batch_rows_read,
|
||||
_lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval));
|
||||
@ -2320,9 +2311,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
|
||||
// shrink char_type suffix zero data
|
||||
block->shrink_char_type_column_suffix_zero(_char_type_idx);
|
||||
|
||||
if (UNLIKELY(_estimate_row_size) && block->rows() > 0) {
|
||||
_update_max_row(block);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
// step4: read non_predicate column
|
||||
@ -2358,10 +2346,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
|
||||
}
|
||||
#endif
|
||||
|
||||
if (UNLIKELY(_estimate_row_size) && block->rows() > 0) {
|
||||
_update_max_row(block);
|
||||
}
|
||||
|
||||
// reverse block row order
|
||||
if (_opts.read_orderby_key_reverse) {
|
||||
size_t num_rows = block->rows();
|
||||
@ -2515,15 +2499,6 @@ void SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl(
|
||||
}
|
||||
}
|
||||
|
||||
void SegmentIterator::_update_max_row(const vectorized::Block* block) {
|
||||
_estimate_row_size = false;
|
||||
auto avg_row_size = block->bytes() / block->rows();
|
||||
if (avg_row_size > 0) {
|
||||
int block_row_max = config::doris_scan_block_max_mb / avg_row_size;
|
||||
_opts.block_row_max = std::min(block_row_max, _opts.block_row_max);
|
||||
}
|
||||
}
|
||||
|
||||
Status SegmentIterator::current_block_row_locations(std::vector<RowLocation>* block_row_locations) {
|
||||
DCHECK(_opts.record_rowids);
|
||||
DCHECK_GE(_block_rowids.size(), _current_batch_rows_read);
|
||||
|
||||
@ -286,8 +286,6 @@ private:
|
||||
|
||||
void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* predicate);
|
||||
|
||||
void _update_max_row(const vectorized::Block* block);
|
||||
|
||||
bool _check_apply_by_bitmap_index(ColumnPredicate* pred);
|
||||
bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound = false);
|
||||
|
||||
@ -442,9 +440,6 @@ private:
|
||||
// the actual init process is delayed to the first call to next_batch()
|
||||
bool _lazy_inited;
|
||||
bool _inited;
|
||||
bool _estimate_row_size;
|
||||
// Read up to 100 rows at a time while waiting for the estimated row size.
|
||||
int _wait_times_estimate_row_size;
|
||||
|
||||
StorageReadOptions _opts;
|
||||
// make a copy of `_opts.column_predicates` in order to make local changes
|
||||
|
||||
Reference in New Issue
Block a user