[opt](MergedIO) optimize merge small IO, prevent amplified read (#20305)
Optimize the strategy of merging small IO to prevent severe read amplification, and turn off merged IO when file cache enabled. Adjustable parameters: ``` // the max amplified read ratio when merging small IO max_amplified_read_ratio=0.8 // the min segment size file_cache_min_file_segment_size = 1048576 ```
This commit is contained in:
@ -856,6 +856,7 @@ DEFINE_mInt32(parquet_header_max_size_mb, "1");
|
||||
DEFINE_mInt32(parquet_rowgroup_max_buffer_mb, "128");
|
||||
// Max buffer size for parquet chunk column
|
||||
DEFINE_mInt32(parquet_column_max_buffer_mb, "8");
|
||||
DEFINE_mDouble(max_amplified_read_ratio, "0.8");
|
||||
|
||||
// OrcReader
|
||||
DEFINE_mInt32(orc_natural_read_size_mb, "8");
|
||||
|
||||
@ -884,6 +884,8 @@ DECLARE_mInt32(parquet_header_max_size_mb);
|
||||
DECLARE_mInt32(parquet_rowgroup_max_buffer_mb);
|
||||
// Max buffer size for parquet chunk column
|
||||
DECLARE_mInt32(parquet_column_max_buffer_mb);
|
||||
// Merge small IO, the max amplified read ratio
|
||||
DECLARE_mDouble(max_amplified_read_ratio);
|
||||
|
||||
// OrcReader
|
||||
DECLARE_mInt32(orc_natural_read_size_mb);
|
||||
|
||||
@ -135,6 +135,16 @@ Status MergeRangeFileReader::read_at_impl(size_t offset, Slice result, size_t* b
|
||||
}
|
||||
if (gap < merge_end - merge_start && content_size < _remaining &&
|
||||
!_range_cached_data[merge_index + 1].has_read) {
|
||||
size_t next_content =
|
||||
std::min(_random_access_ranges[merge_index + 1].end_offset, merge_end) -
|
||||
_random_access_ranges[merge_index + 1].start_offset;
|
||||
next_content = std::min(next_content, _remaining - content_size);
|
||||
double amplified_ratio = config::max_amplified_read_ratio;
|
||||
if ((content_size + hollow_size) > MIN_READ_SIZE &&
|
||||
(hollow_size + gap) > (next_content + content_size) * amplified_ratio) {
|
||||
// too large gap
|
||||
break;
|
||||
}
|
||||
hollow_size += gap;
|
||||
merge_start = _random_access_ranges[merge_index + 1].start_offset;
|
||||
} else {
|
||||
|
||||
@ -123,6 +123,7 @@ public:
|
||||
static constexpr size_t BOX_SIZE = 1 * 1024 * 1024; // 1MB
|
||||
static constexpr size_t SMALL_IO = 2 * 1024 * 1024; // 2MB
|
||||
static constexpr size_t NUM_BOX = TOTAL_BUFFER_SIZE / BOX_SIZE; // 128
|
||||
static constexpr size_t MIN_READ_SIZE = 4096; // 4KB
|
||||
|
||||
MergeRangeFileReader(RuntimeProfile* profile, io::FileReaderSPtr reader,
|
||||
const std::vector<PrefetchRange>& random_access_ranges)
|
||||
|
||||
@ -569,15 +569,21 @@ Status ParquetReader::_next_row_group_reader() {
|
||||
|
||||
RowGroupReader::PositionDeleteContext position_delete_ctx =
|
||||
_get_position_delete_ctx(row_group, row_group_index);
|
||||
size_t avg_io_size = 0;
|
||||
const std::vector<io::PrefetchRange> io_ranges =
|
||||
_generate_random_access_ranges(row_group_index, &avg_io_size);
|
||||
// The underlying page reader will prefetch data in column.
|
||||
// Using both MergeRangeFileReader and BufferedStreamReader simultaneously would waste a lot of memory.
|
||||
io::FileReaderSPtr group_file_reader =
|
||||
avg_io_size < io::MergeRangeFileReader::SMALL_IO
|
||||
? std::make_shared<io::MergeRangeFileReader>(_profile, _file_reader, io_ranges)
|
||||
: _file_reader;
|
||||
io::FileReaderSPtr group_file_reader;
|
||||
if (typeid_cast<io::InMemoryFileReader*>(_file_reader.get())) {
|
||||
// InMemoryFileReader has the ability to merge small IO
|
||||
group_file_reader = _file_reader;
|
||||
} else {
|
||||
size_t avg_io_size = 0;
|
||||
const std::vector<io::PrefetchRange> io_ranges =
|
||||
_generate_random_access_ranges(row_group_index, &avg_io_size);
|
||||
// The underlying page reader will prefetch data in column.
|
||||
// Using both MergeRangeFileReader and BufferedStreamReader simultaneously would waste a lot of memory.
|
||||
group_file_reader = avg_io_size < io::MergeRangeFileReader::SMALL_IO
|
||||
? std::make_shared<io::MergeRangeFileReader>(
|
||||
_profile, _file_reader, io_ranges)
|
||||
: _file_reader;
|
||||
}
|
||||
_current_group_reader.reset(new RowGroupReader(
|
||||
group_file_reader, _read_columns, row_group_index.row_group_id, row_group, _ctz,
|
||||
_io_ctx, position_delete_ctx, _lazy_read_ctx, _state));
|
||||
|
||||
Reference in New Issue
Block a user