[Opt](orc)Optimize the merge io when orc reader read multiple tiny stripes. (#42004) (#44239)

bp #42004

Co-authored-by: kaka11chen <kaka11.chen@gmail.com>
This commit is contained in:
daidai
2024-11-22 11:01:41 +08:00
committed by GitHub
parent 75f8323f45
commit 702abbff0f
15 changed files with 3043 additions and 17 deletions

View File

@ -874,5 +874,107 @@ Status DelegateReader::create_file_reader(RuntimeProfile* profile,
}
return Status();
}
Status LinearProbeRangeFinder::get_range_for(int64_t desired_offset,
io::PrefetchRange& result_range) {
while (index < _ranges.size()) {
io::PrefetchRange& range = _ranges[index];
if (range.end_offset > desired_offset) {
if (range.start_offset > desired_offset) [[unlikely]] {
return Status::InvalidArgument("Invalid desiredOffset");
}
result_range = range;
return Status::OK();
}
++index;
}
return Status::InvalidArgument("Invalid desiredOffset");
}
RangeCacheFileReader::RangeCacheFileReader(RuntimeProfile* profile, io::FileReaderSPtr inner_reader,
std::shared_ptr<RangeFinder> range_finder)
: _profile(profile),
_inner_reader(std::move(inner_reader)),
_range_finder(std::move(range_finder)) {
_size = _inner_reader->size();
uint64_t max_cache_size =
std::max((uint64_t)4096, (uint64_t)_range_finder->get_max_range_size());
_cache = OwnedSlice(max_cache_size);
if (_profile != nullptr) {
const char* random_profile = "RangeCacheFileReader";
ADD_TIMER_WITH_LEVEL(_profile, random_profile, 1);
_request_io =
ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestIO", TUnit::UNIT, random_profile, 1);
_request_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestBytes", TUnit::BYTES,
random_profile, 1);
_request_time = ADD_CHILD_TIMER_WITH_LEVEL(_profile, "RequestTime", random_profile, 1);
_read_to_cache_time =
ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadToCacheTime", random_profile, 1);
_cache_refresh_count = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "CacheRefreshCount",
TUnit::UNIT, random_profile, 1);
_read_to_cache_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "ReadToCacheBytes",
TUnit::BYTES, random_profile, 1);
}
}
Status RangeCacheFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_read,
const IOContext* io_ctx) {
auto request_size = result.size;
_cache_statistics.request_io++;
_cache_statistics.request_bytes += request_size;
SCOPED_RAW_TIMER(&_cache_statistics.request_time);
PrefetchRange range;
if (_range_finder->get_range_for(offset, range)) [[likely]] {
if (_current_start_offset != range.start_offset) { // need read new range to cache.
auto range_size = range.end_offset - range.start_offset;
_cache_statistics.cache_refresh_count++;
_cache_statistics.read_to_cache_bytes += range_size;
SCOPED_RAW_TIMER(&_cache_statistics.read_to_cache_time);
Slice cache_slice = {_cache.data(), range_size};
RETURN_IF_ERROR(
_inner_reader->read_at(range.start_offset, cache_slice, bytes_read, io_ctx));
if (*bytes_read != range_size) [[unlikely]] {
return Status::InternalError(
"RangeCacheFileReader use inner reader read bytes {} not eq expect size {}",
*bytes_read, range_size);
}
_current_start_offset = range.start_offset;
}
int64_t buffer_offset = offset - _current_start_offset;
memcpy(result.data, _cache.data() + buffer_offset, request_size);
*bytes_read = request_size;
return Status::OK();
} else {
return Status::InternalError("RangeCacheFileReader read not in Ranges. Offset = {}",
offset);
// RETURN_IF_ERROR(_inner_reader->read_at(offset, result , bytes_read, io_ctx));
// return Status::OK();
// think return error is ok,otherwise it will cover up the error.
}
}
void RangeCacheFileReader::_collect_profile_before_close() {
if (_profile != nullptr) {
COUNTER_UPDATE(_request_io, _cache_statistics.request_io);
COUNTER_UPDATE(_request_bytes, _cache_statistics.request_bytes);
COUNTER_UPDATE(_request_time, _cache_statistics.request_time);
COUNTER_UPDATE(_read_to_cache_time, _cache_statistics.read_to_cache_time);
COUNTER_UPDATE(_cache_refresh_count, _cache_statistics.cache_refresh_count);
COUNTER_UPDATE(_read_to_cache_bytes, _cache_statistics.read_to_cache_bytes);
if (_inner_reader != nullptr) {
_inner_reader->collect_profile_before_close();
}
}
}
} // namespace io
} // namespace doris

View File

@ -53,6 +53,149 @@ struct PrefetchRange {
: start_offset(start_offset), end_offset(end_offset) {}
PrefetchRange() : start_offset(0), end_offset(0) {}
bool operator==(const PrefetchRange& other) const {
return (start_offset == other.start_offset) && (end_offset == other.end_offset);
}
bool operator!=(const PrefetchRange& other) const { return !(*this == other); }
PrefetchRange span(const PrefetchRange& other) const {
return {std::min(start_offset, other.end_offset), std::max(start_offset, other.end_offset)};
}
PrefetchRange seq_span(const PrefetchRange& other) const {
return {start_offset, other.end_offset};
}
//Ranges needs to be sorted.
static std::vector<PrefetchRange> merge_adjacent_seq_ranges(
const std::vector<PrefetchRange>& seq_ranges, int64_t max_merge_distance_bytes,
int64_t once_max_read_bytes) {
if (seq_ranges.empty()) {
return {};
}
// Merge overlapping ranges
std::vector<PrefetchRange> result;
PrefetchRange last = seq_ranges.front();
for (size_t i = 1; i < seq_ranges.size(); ++i) {
PrefetchRange current = seq_ranges[i];
PrefetchRange merged = last.seq_span(current);
if (merged.end_offset <= once_max_read_bytes + merged.start_offset &&
last.end_offset + max_merge_distance_bytes >= current.start_offset) {
last = merged;
} else {
result.push_back(last);
last = current;
}
}
result.push_back(last);
return result;
}
};
class RangeFinder {
public:
virtual ~RangeFinder() = default;
virtual Status get_range_for(int64_t desired_offset, io::PrefetchRange& result_range) = 0;
virtual size_t get_max_range_size() const = 0;
};
class LinearProbeRangeFinder : public RangeFinder {
public:
LinearProbeRangeFinder(std::vector<io::PrefetchRange>&& ranges) : _ranges(std::move(ranges)) {}
Status get_range_for(int64_t desired_offset, io::PrefetchRange& result_range) override;
size_t get_max_range_size() const override {
size_t max_range_size = 0;
for (const auto& range : _ranges) {
max_range_size = std::max(max_range_size, range.end_offset - range.start_offset);
}
return max_range_size;
}
~LinearProbeRangeFinder() override = default;
private:
std::vector<io::PrefetchRange> _ranges;
size_t index {0};
};
/**
* The reader provides a solution to read one range at a time. You can customize RangeFinder to meet your scenario.
* For me, since there will be tiny stripes when reading orc files, in order to reduce the requests to hdfs,
* I first merge the access to the orc files to be read (of course there is a problem of read amplification,
* but in my scenario, compared with reading hdfs multiple times, it is faster to read more data on hdfs at one time),
* and then because the actual reading of orc files is in order from front to back, I provide LinearProbeRangeFinder.
*/
class RangeCacheFileReader : public io::FileReader {
struct RangeCacheReaderStatistics {
int64_t request_io = 0;
int64_t request_bytes = 0;
int64_t request_time = 0;
int64_t read_to_cache_time = 0;
int64_t cache_refresh_count = 0;
int64_t read_to_cache_bytes = 0;
};
public:
RangeCacheFileReader(RuntimeProfile* profile, io::FileReaderSPtr inner_reader,
std::shared_ptr<RangeFinder> range_finder);
~RangeCacheFileReader() override = default;
Status close() override {
if (!_closed) {
_closed = true;
}
return Status::OK();
}
const io::Path& path() const override { return _inner_reader->path(); }
size_t size() const override { return _size; }
bool closed() const override { return _closed; }
std::shared_ptr<io::FileSystem> fs() const override { return _inner_reader->fs(); }
protected:
Status read_at_impl(size_t offset, Slice result, size_t* bytes_read,
const IOContext* io_ctx) override;
void _collect_profile_before_close() override;
private:
RuntimeProfile* _profile = nullptr;
io::FileReaderSPtr _inner_reader;
std::shared_ptr<RangeFinder> _range_finder;
OwnedSlice _cache;
int64_t _current_start_offset = -1;
size_t _size;
bool _closed = false;
RuntimeProfile::Counter* _request_io = nullptr;
RuntimeProfile::Counter* _request_bytes = nullptr;
RuntimeProfile::Counter* _request_time = nullptr;
RuntimeProfile::Counter* _read_to_cache_time = nullptr;
RuntimeProfile::Counter* _cache_refresh_count = nullptr;
RuntimeProfile::Counter* _read_to_cache_bytes = nullptr;
RangeCacheReaderStatistics _cache_statistics;
/**
* `RangeCacheFileReader`:
* 1. `CacheRefreshCount`: how many IOs are merged
* 2. `ReadToCacheBytes`: how much data is actually read after merging
* 3. `ReadToCacheTime`: how long it takes to read data after merging
* 4. `RequestBytes`: how many bytes does the apache-orc library actually need to read the orc file
* 5. `RequestIO`: how many times the apache-orc library calls this read interface
* 6. `RequestTime`: how long it takes the apache-orc library to call this read interface
*
* It should be noted that `RangeCacheFileReader` is a wrapper of the reader that actually reads data,such as
* the hdfs reader, so strictly speaking, `CacheRefreshCount` is not equal to how many IOs are initiated to hdfs,
* because each time the hdfs reader is requested, the hdfs reader may not be able to read all the data at once.
*/
};
/**

View File

@ -857,28 +857,79 @@ Status OrcReader::set_fill_columns(
if (_colname_to_value_range == nullptr || !_init_search_argument(_colname_to_value_range)) {
_lazy_read_ctx.can_lazy_read = false;
}
if (!_lazy_read_ctx.can_lazy_read) {
for (auto& kv : _lazy_read_ctx.predicate_partition_columns) {
_lazy_read_ctx.partition_columns.emplace(kv.first, kv.second);
}
for (auto& kv : _lazy_read_ctx.predicate_missing_columns) {
_lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
}
}
_fill_all_columns = true;
// create orc row reader
try {
_row_reader_options.range(_range_start_offset, _range_size);
_row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : _ctz);
_row_reader_options.include(_read_cols);
_row_reader_options.setEnableLazyDecoding(true);
uint64_t number_of_stripes = _reader->getNumberOfStripes();
auto all_stripes_needed = _reader->getNeedReadStripes(_row_reader_options);
int64_t range_end_offset = _range_start_offset + _range_size;
// If you set "orc_tiny_stripe_threshold_bytes" = 0, the use tiny stripes merge io optimization will not be used.
int64_t orc_tiny_stripe_threshold_bytes = 8L * 1024L * 1024L;
int64_t orc_once_max_read_bytes = 8L * 1024L * 1024L;
int64_t orc_max_merge_distance_bytes = 1L * 1024L * 1024L;
if (_state != nullptr) {
orc_tiny_stripe_threshold_bytes =
_state->query_options().orc_tiny_stripe_threshold_bytes;
orc_once_max_read_bytes = _state->query_options().orc_once_max_read_bytes;
orc_max_merge_distance_bytes = _state->query_options().orc_max_merge_distance_bytes;
}
bool all_tiny_stripes = true;
std::vector<io::PrefetchRange> tiny_stripe_ranges;
for (uint64_t i = 0; i < number_of_stripes; i++) {
std::unique_ptr<orc::StripeInformation> strip_info = _reader->getStripe(i);
uint64_t strip_start_offset = strip_info->getOffset();
uint64_t strip_end_offset = strip_start_offset + strip_info->getLength();
if (strip_start_offset >= range_end_offset || strip_end_offset < _range_start_offset ||
!all_stripes_needed[i]) {
continue;
}
if (strip_info->getLength() > orc_tiny_stripe_threshold_bytes) {
all_tiny_stripes = false;
break;
}
tiny_stripe_ranges.emplace_back(strip_start_offset, strip_end_offset);
}
if (all_tiny_stripes && number_of_stripes > 0) {
std::vector<io::PrefetchRange> prefetch_merge_ranges =
io::PrefetchRange::merge_adjacent_seq_ranges(tiny_stripe_ranges,
orc_max_merge_distance_bytes,
orc_once_max_read_bytes);
auto range_finder =
std::make_shared<io::LinearProbeRangeFinder>(std::move(prefetch_merge_ranges));
auto* orc_input_stream_ptr = static_cast<ORCFileInputStream*>(_reader->getStream());
orc_input_stream_ptr->set_all_tiny_stripes();
auto& orc_file_reader = orc_input_stream_ptr->get_file_reader();
auto orc_inner_reader = orc_input_stream_ptr->get_inner_reader();
orc_file_reader = std::make_shared<io::RangeCacheFileReader>(_profile, orc_inner_reader,
range_finder);
}
if (!_lazy_read_ctx.can_lazy_read) {
for (auto& kv : _lazy_read_ctx.predicate_partition_columns) {
_lazy_read_ctx.partition_columns.emplace(kv.first, kv.second);
}
for (auto& kv : _lazy_read_ctx.predicate_missing_columns) {
_lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
}
}
_fill_all_columns = true;
// create orc row reader
if (_lazy_read_ctx.can_lazy_read) {
_row_reader_options.filter(_lazy_read_ctx.predicate_orc_columns);
_orc_filter = std::unique_ptr<ORCFilterImpl>(new ORCFilterImpl(this));
}
_row_reader_options.setEnableLazyDecoding(true);
if (!_lazy_read_ctx.conjuncts.empty()) {
_string_dict_filter = std::make_unique<StringDictFilterImpl>(this);
}
@ -2416,6 +2467,9 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column(
void ORCFileInputStream::beforeReadStripe(
std::unique_ptr<orc::StripeInformation> current_strip_information,
std::vector<bool> selected_columns) {
if (_is_all_tiny_stripes) {
return;
}
if (_file_reader != nullptr) {
_file_reader->collect_profile_before_close();
}

View File

@ -34,6 +34,7 @@
#include "common/status.h"
#include "exec/olap_common.h"
#include "io/file_factory.h"
#include "io/fs/buffered_reader.h"
#include "io/fs/file_reader.h"
#include "io/fs/file_reader_writer_fwd.h"
#include "olap/olap_common.h"
@ -642,7 +643,11 @@ public:
_io_ctx(io_ctx),
_profile(profile) {}
~ORCFileInputStream() override = default;
~ORCFileInputStream() override {
if (_file_reader != nullptr) {
_file_reader->collect_profile_before_close();
}
}
uint64_t getLength() const override { return _file_reader->size(); }
@ -655,6 +660,12 @@ public:
void beforeReadStripe(std::unique_ptr<orc::StripeInformation> current_strip_information,
std::vector<bool> selected_columns) override;
void set_all_tiny_stripes() { _is_all_tiny_stripes = true; }
io::FileReaderSPtr& get_file_reader() { return _file_reader; }
io::FileReaderSPtr& get_inner_reader() { return _inner_reader; }
protected:
void _collect_profile_at_runtime() override {};
void _collect_profile_before_close() override;
@ -663,10 +674,10 @@ private:
const std::string& _file_name;
io::FileReaderSPtr _inner_reader;
io::FileReaderSPtr _file_reader;
bool _is_all_tiny_stripes = false;
// Owned by OrcReader
OrcReader::Statistics* _statistics = nullptr;
const io::IOContext* _io_ctx = nullptr;
RuntimeProfile* _profile = nullptr;
};
} // namespace doris::vectorized

View File

@ -118,6 +118,36 @@ private:
io::Path _path = "/tmp/mock";
};
class TestingRangeCacheFileReader : public io::FileReader {
public:
TestingRangeCacheFileReader(std::shared_ptr<io::FileReader> delegate) : _delegate(delegate) {};
~TestingRangeCacheFileReader() override = default;
Status close() override { return _delegate->close(); }
const io::Path& path() const override { return _delegate->path(); }
size_t size() const override { return _delegate->size(); }
bool closed() const override { return _delegate->closed(); }
const io::PrefetchRange& last_read_range() const { return *_last_read_range; }
std::shared_ptr<io::FileSystem> fs() const override { return _delegate->fs(); }
protected:
Status read_at_impl(size_t offset, Slice result, size_t* bytes_read,
const io::IOContext* io_ctx) override {
_last_read_range = std::make_unique<io::PrefetchRange>(offset, offset + result.size);
return _delegate->read_at_impl(offset, result, bytes_read, io_ctx);
}
private:
std::shared_ptr<io::FileReader> _delegate;
std::unique_ptr<io::PrefetchRange> _last_read_range;
};
TEST_F(BufferedReaderTest, normal_use) {
// buffered_reader_test_file 950 bytes
io::FileReaderSPtr local_reader;
@ -402,4 +432,84 @@ TEST_F(BufferedReaderTest, test_merged_io) {
}
}
TEST_F(BufferedReaderTest, test_range_cache_file_reader) {
io::FileReaderSPtr offset_reader = std::make_shared<MockOffsetFileReader>(128 * 1024 * 1024);
auto testing_reader = std::make_shared<TestingRangeCacheFileReader>(offset_reader);
int64_t orc_max_merge_distance = 1L * 1024L * 1024L;
int64_t orc_once_max_read_size = 8L * 1024L * 1024L;
{
std::vector<io::PrefetchRange> tiny_stripe_ranges = {
io::PrefetchRange(3, 33),
io::PrefetchRange(33, 63),
io::PrefetchRange(63, 8L * 1024L * 1024L + 63),
};
std::vector<io::PrefetchRange> prefetch_merge_ranges =
io::PrefetchRange::merge_adjacent_seq_ranges(
tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size);
auto range_finder =
std::make_shared<io::LinearProbeRangeFinder>(std::move(prefetch_merge_ranges));
io::RangeCacheFileReader range_cache_file_reader(nullptr, testing_reader, range_finder);
char data[1];
Slice result(data, 1);
size_t bytes_read;
EXPECT_TRUE(range_cache_file_reader.read_at(3, result, &bytes_read, nullptr).ok());
EXPECT_EQ(io::PrefetchRange(3, 63), testing_reader->last_read_range());
EXPECT_TRUE(range_cache_file_reader.read_at(63, result, &bytes_read, nullptr).ok());
EXPECT_EQ(io::PrefetchRange(63, 8 * 1024L * 1024L + 63), testing_reader->last_read_range());
EXPECT_TRUE(range_cache_file_reader.close().ok());
}
{
std::vector<io::PrefetchRange> tiny_stripe_ranges = {
io::PrefetchRange(3, 33),
io::PrefetchRange(33, 63),
io::PrefetchRange(63, 8L * 1024L * 1024L + 63),
};
std::vector<io::PrefetchRange> prefetch_merge_ranges =
io::PrefetchRange::merge_adjacent_seq_ranges(
tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size);
auto range_finder =
std::make_shared<io::LinearProbeRangeFinder>(std::move(prefetch_merge_ranges));
io::RangeCacheFileReader range_cache_file_reader(nullptr, testing_reader, range_finder);
char data[1];
Slice result(data, 1);
size_t bytes_read;
EXPECT_TRUE(range_cache_file_reader.read_at(62, result, &bytes_read, nullptr).ok());
EXPECT_EQ(io::PrefetchRange(3, 63), testing_reader->last_read_range());
EXPECT_TRUE(range_cache_file_reader.read_at(63, result, &bytes_read, nullptr).ok());
EXPECT_EQ(io::PrefetchRange(63, 8L * 1024L * 1024L + 63),
testing_reader->last_read_range());
EXPECT_TRUE(range_cache_file_reader.close().ok());
}
{
std::vector<io::PrefetchRange> tiny_stripe_ranges = {
io::PrefetchRange(3, 3),
io::PrefetchRange(4, 1048576L * 5L + 4),
io::PrefetchRange(1048576L * 5L + 4, 1048576L * 3L + 1048576L * 5L + 4),
};
std::vector<io::PrefetchRange> prefetch_merge_ranges =
io::PrefetchRange::merge_adjacent_seq_ranges(
tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size);
auto range_finder =
std::make_shared<io::LinearProbeRangeFinder>(std::move(prefetch_merge_ranges));
io::RangeCacheFileReader range_cache_file_reader(nullptr, testing_reader, range_finder);
char data[1];
Slice result(data, 1);
size_t bytes_read;
EXPECT_TRUE(range_cache_file_reader.read_at(3, result, &bytes_read, nullptr).ok());
EXPECT_EQ(io::PrefetchRange(3, 1 + 1048576 * 5 + 3), testing_reader->last_read_range());
EXPECT_TRUE(range_cache_file_reader.read_at(4 + 1048576 * 5, result, &bytes_read, nullptr)
.ok());
EXPECT_EQ(io::PrefetchRange(4 + 1048576 * 5, 3 * 1048576 + 4 + 1048576 * 5),
testing_reader->last_read_range());
EXPECT_TRUE(range_cache_file_reader.close().ok());
}
}
} // end namespace doris

View File

@ -0,0 +1,11 @@
use `default`;
CREATE TABLE `orc_tiny_stripes`(
col1 bigint,
col2 string,
col3 bigint
)
STORED AS orc
LOCATION '/user/doris/preinstalled_data/orc/orc_tiny_stripes';
msck repair table orc_tiny_stripes;

View File

@ -447,6 +447,12 @@ public class SessionVariable implements Serializable, Writable {
public static final String ENABLE_ORC_LAZY_MAT = "enable_orc_lazy_materialization";
public static final String ORC_TINY_STRIPE_THRESHOLD_BYTES = "orc_tiny_stripe_threshold_bytes";
public static final String ORC_ONCE_MAX_READ_BYTES = "orc_once_max_read_bytes";
public static final String ORC_MAX_MERGE_DISTANCE_BYTES = "orc_max_merge_distance_bytes";
public static final String ENABLE_PARQUET_FILTER_BY_MIN_MAX = "enable_parquet_filter_by_min_max";
public static final String ENABLE_ORC_FILTER_BY_MIN_MAX = "enable_orc_filter_by_min_max";
@ -1648,6 +1654,46 @@ public class SessionVariable implements Serializable, Writable {
public boolean enableOrcLazyMat = true;
@VariableMgr.VarAttr(
name = ORC_TINY_STRIPE_THRESHOLD_BYTES,
description = {"在orc文件中如果一个stripe的字节大小小于`orc_tiny_stripe_threshold`,"
+ "我们认为该stripe为 tiny stripe。对于多个连续的tiny stripe我们会进行读取优化,即一次性读多个tiny stripe."
+ "如果你不想使用该优化,可以将该值设置为0。默认为 8M。",
"In an orc file, if the byte size of a stripe is less than `orc_tiny_stripe_threshold`,"
+ "we consider the stripe to be a tiny stripe. For multiple consecutive tiny stripes,"
+ "we will perform read optimization, that is, read multiple tiny stripes at a time."
+ "If you do not want to use this optimization, you can set this value to 0."
+ "The default is 8M."},
needForward = true,
setter = "setOrcTinyStripeThresholdBytes")
public long orcTinyStripeThresholdBytes = 8L * 1024L * 1024L;
@VariableMgr.VarAttr(
name = ORC_ONCE_MAX_READ_BYTES,
description = {"在使用tiny stripe读取优化的时候,会对多个tiny stripe合并成一次IO,"
+ "该参数用来控制每次IO请求的最大字节大小。你不应该将值设置的小于`orc_tiny_stripe_threshold`。默认为 8M。",
"When using tiny stripe read optimization, multiple tiny stripes will be merged into one IO."
+ "This parameter is used to control the maximum byte size of each IO request."
+ "You should not set the value less than `orc_tiny_stripe_threshold`."
+ "The default is 8M."},
needForward = true,
setter = "setOrcOnceMaxReadBytes")
public long orcOnceMaxReadBytes = 8L * 1024L * 1024L;
@VariableMgr.VarAttr(
name = ORC_MAX_MERGE_DISTANCE_BYTES,
description = {"在使用tiny stripe读取优化的时候,由于tiny stripe并不一定连续。"
+ "当两个tiny stripe之间距离大于该参数时,我们不会将其合并成一次IO。默认为 1M。",
"When using tiny stripe read optimization, since tiny stripes are not necessarily continuous,"
+ "when the distance between two tiny stripes is greater than this parameter,"
+ "we will not merge them into one IO. The default value is 1M."},
needForward = true,
setter = "setOrcMaxMergeDistanceBytes")
public long orcMaxMergeDistanceBytes = 1024L * 1024L;
@VariableMgr.VarAttr(
name = ENABLE_PARQUET_FILTER_BY_MIN_MAX,
description = {"控制 parquet reader 是否启用 min-max 值过滤。默认为 true。",
@ -2662,6 +2708,32 @@ public class SessionVariable implements Serializable, Writable {
this.parallelExecInstanceNum = val;
}
public void setOrcTinyStripeThresholdBytes(String value) throws Exception {
long val = checkFieldLongValue(ORC_TINY_STRIPE_THRESHOLD_BYTES, 0, value);
this.orcTinyStripeThresholdBytes = val;
}
public void setOrcOnceMaxReadBytes(String value) throws Exception {
long val = checkFieldLongValue(ORC_ONCE_MAX_READ_BYTES, 0, value);
this.orcOnceMaxReadBytes = val;
}
public void setOrcMaxMergeDistanceBytes(String value) throws Exception {
long val = checkFieldLongValue(ORC_MAX_MERGE_DISTANCE_BYTES, 0, value);
this.orcMaxMergeDistanceBytes = val;
}
private long checkFieldLongValue(String variableName, long minValue, String value) throws Exception {
long val = Long.parseLong(value);
if (val < minValue) {
throw new Exception(
variableName + " value should greater than or equal " + String.valueOf(minValue)
+ ", you set value is: " + value);
}
return val;
}
private int checkFieldValue(String variableName, int minValue, String value) throws Exception {
int val = Integer.valueOf(value);
if (val < minValue) {
@ -3684,6 +3756,11 @@ public class SessionVariable implements Serializable, Writable {
tResult.setAdaptivePipelineTaskSerialReadOnLimit(adaptivePipelineTaskSerialReadOnLimit);
tResult.setInListValueCountThreshold(inListValueCountThreshold);
tResult.setEnableAutoCreateWhenOverwrite(enableAutoCreateWhenOverwrite);
tResult.setOrcTinyStripeThresholdBytes(orcTinyStripeThresholdBytes);
tResult.setOrcMaxMergeDistanceBytes(orcMaxMergeDistanceBytes);
tResult.setOrcOnceMaxReadBytes(orcOnceMaxReadBytes);
return tResult;
}

View File

@ -334,6 +334,10 @@ struct TQueryOptions {
134: optional i32 partition_topn_pre_partition_rows = 1000;
137: optional bool enable_auto_create_when_overwrite = false;
138: optional i64 orc_tiny_stripe_threshold_bytes = 8388608;
139: optional i64 orc_once_max_read_bytes = 8388608;
140: optional i64 orc_max_merge_distance_bytes = 1048576;
// For cloud, to control if the content would be written into file cache
1000: optional bool disable_file_cache = false
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,203 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_orc_tiny_stripes", "p0,external,hive,external_docker,external_docker_hive") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("diable Hive test.")
return;
}
for (String hivePrefix : ["hive2"]) {
try {
String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String catalog_name = "${hivePrefix}_test_orc_tiny_stripes"
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
sql """drop catalog if exists ${catalog_name}"""
sql """create catalog if not exists ${catalog_name} properties (
"type"="hms",
'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
);"""
sql """use `${catalog_name}`.`default`"""
def orc_configs = [
[0,0,0],
[0,10230,1024],
[1,1,1],
[201,130,0],
[1024,1024,0],
[1024,1024,1024],
[4096,1024,0],
[1024,4096,0],
[1,10240,10000000],
[1000000,888888888,0],
[1000000000000,1000000000000,100000000000]
]
def li = [ "set enable_orc_lazy_materialization=true;","set enable_orc_lazy_materialization=false;"]
li.each { it1 ->
sql it1
orc_configs.each { it2 ->
def value1 = it2[0].toString()
def value2 = it2[1].toString()
def value3 = it2[2].toString()
sql "set orc_tiny_stripe_threshold_bytes = " + value1 + ";"
sql "set orc_once_max_read_bytes = " + value2 + ";"
sql "set orc_max_merge_distance_bytes = " + value3 + ";"
qt_test_1 """ select count(*) from orc_tiny_stripes; """ //372
/*
*/
qt_test_2 """ select * from orc_tiny_stripes where col1 = 1 order by col1,col2,col3; """
/*
1 str_1 10000000001
1 str_1 10000000001
*/
qt_test_3 """ select * from orc_tiny_stripes where col1%100 = 0 order by col1,col2,col3 ; """
/*
0 str_0 10000000000
0 str_0 10000000000
100 9DPJaFc00euBteqiW1f1 10000000027
100 str_100 10000000100
2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034
4800 TaWGgh4iZ 10000000115
5700 SwOaGJj9fVbk5j0Np 10000000050
*/
qt_test_4 """ select * from orc_tiny_stripes where col2 = "str_4" order by col1,col2,col3; """
/*
4 str_4 10000000004
4 str_4 10000000004
*/
qt_test_5 """ select count(*) from orc_tiny_stripes where col3 > 10000000005; """ //348
qt_test_6 """ select * from orc_tiny_stripes where col3 in ( 10000000005,10000000053,10000000146) order by col1,col2,col3 ; """
/*
5 str_5 10000000005
5 str_5 10000000005
53 str_53 10000000053
146 str_146 10000000146
3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053
4129 qwPIwtkTZb 10000000005
4942 vAdLpLUN3VkGNmTjvuPv 10000000053
5349 koTeYPr2Qaqqnlk07X 10000000146
5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005
7573 e3lIPwNnbG6DPmog 10000000005
8614 TtyopDvRptLB5 10000000005
*/
qt_test_7 """ select * from orc_tiny_stripes where col3 in ( 10000000005,10000000053,10000000146) order by col1,col2,col3 ; """
/*
5 str_5 10000000005
5 str_5 10000000005
53 str_53 10000000053
146 str_146 10000000146
3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053
4129 qwPIwtkTZb 10000000005
4942 vAdLpLUN3VkGNmTjvuPv 10000000053
5349 koTeYPr2Qaqqnlk07X 10000000146
5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005
7573 e3lIPwNnbG6DPmog 10000000005
8614 TtyopDvRptLB5 10000000005
*/
qt_test_8 """ select col3 from orc_tiny_stripes where col3 in ( 10000000005,10000000053,10000000146) order by col3 ; """
/*
10000000005
10000000005
10000000005
10000000005
10000000005
10000000005
10000000053
10000000053
10000000053
10000000146
10000000146
*/
qt_test_9 """ select col1 from orc_tiny_stripes where col1 in (10,1000) order by col1 ; """ // 10
qt_test_10 """ select col2 from orc_tiny_stripes where length(col2) > 29 order by col2 ; """
/*
1cx1jZ6QGRWAkskiOgURj6dscYxDOl
Asn3tnIg1xYm8Lbgey8baqw3EmooFm
MSBtFURjtMu3LyDTLYx9FBM23UQdZ1
e8e7xgwaSI2JKI65FEThzSQBVmKeAZ
w3xAirHLO1tvjon2jgr7y9tBtrGfMS
zABBLCkowUIqfONQOAjir8YPkFqfDW
*/
qt_test_11 """ select * from orc_tiny_stripes where col1 < 10 order by col1,col2,col3; """
/*
0 str_0 10000000000
0 str_0 10000000000
1 str_1 10000000001
1 str_1 10000000001
2 str_2 10000000002
2 str_2 10000000002
3 str_3 10000000003
3 str_3 10000000003
4 str_4 10000000004
4 str_4 10000000004
5 str_5 10000000005
5 str_5 10000000005
6 str_6 10000000006
7 str_7 10000000007
8 str_8 10000000008
9 str_9 10000000009
*/
qt_test_12 """ select col1 from orc_tiny_stripes where col1 in(0,6 ) order by col1; """
/*
0
0
6
*/
qt_test_13 """ select col1 from orc_tiny_stripes where col1 in(20,60 ) order by col1; """
/*
20
60
*/
qt_test_14 """ select col1 from orc_tiny_stripes where col1 in(40,0 ) order by col1; """
/*
0
0
40
*/
}
}
sql """drop catalog if exists ${catalog_name}"""
} finally {
}
}
}