bp #42004 Co-authored-by: kaka11chen <kaka11.chen@gmail.com>
This commit is contained in:
Submodule be/src/apache-orc updated: 903ea6ccdc...db01184f76
@ -874,5 +874,107 @@ Status DelegateReader::create_file_reader(RuntimeProfile* profile,
|
||||
}
|
||||
return Status();
|
||||
}
|
||||
|
||||
Status LinearProbeRangeFinder::get_range_for(int64_t desired_offset,
|
||||
io::PrefetchRange& result_range) {
|
||||
while (index < _ranges.size()) {
|
||||
io::PrefetchRange& range = _ranges[index];
|
||||
if (range.end_offset > desired_offset) {
|
||||
if (range.start_offset > desired_offset) [[unlikely]] {
|
||||
return Status::InvalidArgument("Invalid desiredOffset");
|
||||
}
|
||||
result_range = range;
|
||||
return Status::OK();
|
||||
}
|
||||
++index;
|
||||
}
|
||||
return Status::InvalidArgument("Invalid desiredOffset");
|
||||
}
|
||||
|
||||
RangeCacheFileReader::RangeCacheFileReader(RuntimeProfile* profile, io::FileReaderSPtr inner_reader,
|
||||
std::shared_ptr<RangeFinder> range_finder)
|
||||
: _profile(profile),
|
||||
_inner_reader(std::move(inner_reader)),
|
||||
_range_finder(std::move(range_finder)) {
|
||||
_size = _inner_reader->size();
|
||||
uint64_t max_cache_size =
|
||||
std::max((uint64_t)4096, (uint64_t)_range_finder->get_max_range_size());
|
||||
_cache = OwnedSlice(max_cache_size);
|
||||
|
||||
if (_profile != nullptr) {
|
||||
const char* random_profile = "RangeCacheFileReader";
|
||||
ADD_TIMER_WITH_LEVEL(_profile, random_profile, 1);
|
||||
_request_io =
|
||||
ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestIO", TUnit::UNIT, random_profile, 1);
|
||||
_request_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestBytes", TUnit::BYTES,
|
||||
random_profile, 1);
|
||||
_request_time = ADD_CHILD_TIMER_WITH_LEVEL(_profile, "RequestTime", random_profile, 1);
|
||||
_read_to_cache_time =
|
||||
ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadToCacheTime", random_profile, 1);
|
||||
_cache_refresh_count = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "CacheRefreshCount",
|
||||
TUnit::UNIT, random_profile, 1);
|
||||
_read_to_cache_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "ReadToCacheBytes",
|
||||
TUnit::BYTES, random_profile, 1);
|
||||
}
|
||||
}
|
||||
|
||||
Status RangeCacheFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_read,
|
||||
const IOContext* io_ctx) {
|
||||
auto request_size = result.size;
|
||||
|
||||
_cache_statistics.request_io++;
|
||||
_cache_statistics.request_bytes += request_size;
|
||||
SCOPED_RAW_TIMER(&_cache_statistics.request_time);
|
||||
|
||||
PrefetchRange range;
|
||||
if (_range_finder->get_range_for(offset, range)) [[likely]] {
|
||||
if (_current_start_offset != range.start_offset) { // need read new range to cache.
|
||||
auto range_size = range.end_offset - range.start_offset;
|
||||
|
||||
_cache_statistics.cache_refresh_count++;
|
||||
_cache_statistics.read_to_cache_bytes += range_size;
|
||||
SCOPED_RAW_TIMER(&_cache_statistics.read_to_cache_time);
|
||||
|
||||
Slice cache_slice = {_cache.data(), range_size};
|
||||
RETURN_IF_ERROR(
|
||||
_inner_reader->read_at(range.start_offset, cache_slice, bytes_read, io_ctx));
|
||||
|
||||
if (*bytes_read != range_size) [[unlikely]] {
|
||||
return Status::InternalError(
|
||||
"RangeCacheFileReader use inner reader read bytes {} not eq expect size {}",
|
||||
*bytes_read, range_size);
|
||||
}
|
||||
|
||||
_current_start_offset = range.start_offset;
|
||||
}
|
||||
|
||||
int64_t buffer_offset = offset - _current_start_offset;
|
||||
memcpy(result.data, _cache.data() + buffer_offset, request_size);
|
||||
*bytes_read = request_size;
|
||||
|
||||
return Status::OK();
|
||||
} else {
|
||||
return Status::InternalError("RangeCacheFileReader read not in Ranges. Offset = {}",
|
||||
offset);
|
||||
// RETURN_IF_ERROR(_inner_reader->read_at(offset, result , bytes_read, io_ctx));
|
||||
// return Status::OK();
|
||||
// think return error is ok,otherwise it will cover up the error.
|
||||
}
|
||||
}
|
||||
|
||||
void RangeCacheFileReader::_collect_profile_before_close() {
|
||||
if (_profile != nullptr) {
|
||||
COUNTER_UPDATE(_request_io, _cache_statistics.request_io);
|
||||
COUNTER_UPDATE(_request_bytes, _cache_statistics.request_bytes);
|
||||
COUNTER_UPDATE(_request_time, _cache_statistics.request_time);
|
||||
COUNTER_UPDATE(_read_to_cache_time, _cache_statistics.read_to_cache_time);
|
||||
COUNTER_UPDATE(_cache_refresh_count, _cache_statistics.cache_refresh_count);
|
||||
COUNTER_UPDATE(_read_to_cache_bytes, _cache_statistics.read_to_cache_bytes);
|
||||
if (_inner_reader != nullptr) {
|
||||
_inner_reader->collect_profile_before_close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace io
|
||||
} // namespace doris
|
||||
|
||||
@ -53,6 +53,149 @@ struct PrefetchRange {
|
||||
: start_offset(start_offset), end_offset(end_offset) {}
|
||||
|
||||
PrefetchRange() : start_offset(0), end_offset(0) {}
|
||||
|
||||
bool operator==(const PrefetchRange& other) const {
|
||||
return (start_offset == other.start_offset) && (end_offset == other.end_offset);
|
||||
}
|
||||
|
||||
bool operator!=(const PrefetchRange& other) const { return !(*this == other); }
|
||||
|
||||
PrefetchRange span(const PrefetchRange& other) const {
|
||||
return {std::min(start_offset, other.end_offset), std::max(start_offset, other.end_offset)};
|
||||
}
|
||||
PrefetchRange seq_span(const PrefetchRange& other) const {
|
||||
return {start_offset, other.end_offset};
|
||||
}
|
||||
|
||||
//Ranges needs to be sorted.
|
||||
static std::vector<PrefetchRange> merge_adjacent_seq_ranges(
|
||||
const std::vector<PrefetchRange>& seq_ranges, int64_t max_merge_distance_bytes,
|
||||
int64_t once_max_read_bytes) {
|
||||
if (seq_ranges.empty()) {
|
||||
return {};
|
||||
}
|
||||
// Merge overlapping ranges
|
||||
std::vector<PrefetchRange> result;
|
||||
PrefetchRange last = seq_ranges.front();
|
||||
for (size_t i = 1; i < seq_ranges.size(); ++i) {
|
||||
PrefetchRange current = seq_ranges[i];
|
||||
PrefetchRange merged = last.seq_span(current);
|
||||
if (merged.end_offset <= once_max_read_bytes + merged.start_offset &&
|
||||
last.end_offset + max_merge_distance_bytes >= current.start_offset) {
|
||||
last = merged;
|
||||
} else {
|
||||
result.push_back(last);
|
||||
last = current;
|
||||
}
|
||||
}
|
||||
result.push_back(last);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class RangeFinder {
|
||||
public:
|
||||
virtual ~RangeFinder() = default;
|
||||
virtual Status get_range_for(int64_t desired_offset, io::PrefetchRange& result_range) = 0;
|
||||
virtual size_t get_max_range_size() const = 0;
|
||||
};
|
||||
|
||||
class LinearProbeRangeFinder : public RangeFinder {
|
||||
public:
|
||||
LinearProbeRangeFinder(std::vector<io::PrefetchRange>&& ranges) : _ranges(std::move(ranges)) {}
|
||||
|
||||
Status get_range_for(int64_t desired_offset, io::PrefetchRange& result_range) override;
|
||||
|
||||
size_t get_max_range_size() const override {
|
||||
size_t max_range_size = 0;
|
||||
for (const auto& range : _ranges) {
|
||||
max_range_size = std::max(max_range_size, range.end_offset - range.start_offset);
|
||||
}
|
||||
return max_range_size;
|
||||
}
|
||||
|
||||
~LinearProbeRangeFinder() override = default;
|
||||
|
||||
private:
|
||||
std::vector<io::PrefetchRange> _ranges;
|
||||
size_t index {0};
|
||||
};
|
||||
|
||||
/**
|
||||
* The reader provides a solution to read one range at a time. You can customize RangeFinder to meet your scenario.
|
||||
* For me, since there will be tiny stripes when reading orc files, in order to reduce the requests to hdfs,
|
||||
* I first merge the access to the orc files to be read (of course there is a problem of read amplification,
|
||||
* but in my scenario, compared with reading hdfs multiple times, it is faster to read more data on hdfs at one time),
|
||||
* and then because the actual reading of orc files is in order from front to back, I provide LinearProbeRangeFinder.
|
||||
*/
|
||||
class RangeCacheFileReader : public io::FileReader {
|
||||
struct RangeCacheReaderStatistics {
|
||||
int64_t request_io = 0;
|
||||
int64_t request_bytes = 0;
|
||||
int64_t request_time = 0;
|
||||
int64_t read_to_cache_time = 0;
|
||||
int64_t cache_refresh_count = 0;
|
||||
int64_t read_to_cache_bytes = 0;
|
||||
};
|
||||
|
||||
public:
|
||||
RangeCacheFileReader(RuntimeProfile* profile, io::FileReaderSPtr inner_reader,
|
||||
std::shared_ptr<RangeFinder> range_finder);
|
||||
|
||||
~RangeCacheFileReader() override = default;
|
||||
|
||||
Status close() override {
|
||||
if (!_closed) {
|
||||
_closed = true;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
const io::Path& path() const override { return _inner_reader->path(); }
|
||||
|
||||
size_t size() const override { return _size; }
|
||||
|
||||
bool closed() const override { return _closed; }
|
||||
|
||||
std::shared_ptr<io::FileSystem> fs() const override { return _inner_reader->fs(); }
|
||||
|
||||
protected:
|
||||
Status read_at_impl(size_t offset, Slice result, size_t* bytes_read,
|
||||
const IOContext* io_ctx) override;
|
||||
|
||||
void _collect_profile_before_close() override;
|
||||
|
||||
private:
|
||||
RuntimeProfile* _profile = nullptr;
|
||||
io::FileReaderSPtr _inner_reader;
|
||||
std::shared_ptr<RangeFinder> _range_finder;
|
||||
|
||||
OwnedSlice _cache;
|
||||
int64_t _current_start_offset = -1;
|
||||
|
||||
size_t _size;
|
||||
bool _closed = false;
|
||||
|
||||
RuntimeProfile::Counter* _request_io = nullptr;
|
||||
RuntimeProfile::Counter* _request_bytes = nullptr;
|
||||
RuntimeProfile::Counter* _request_time = nullptr;
|
||||
RuntimeProfile::Counter* _read_to_cache_time = nullptr;
|
||||
RuntimeProfile::Counter* _cache_refresh_count = nullptr;
|
||||
RuntimeProfile::Counter* _read_to_cache_bytes = nullptr;
|
||||
RangeCacheReaderStatistics _cache_statistics;
|
||||
/**
|
||||
* `RangeCacheFileReader`:
|
||||
* 1. `CacheRefreshCount`: how many IOs are merged
|
||||
* 2. `ReadToCacheBytes`: how much data is actually read after merging
|
||||
* 3. `ReadToCacheTime`: how long it takes to read data after merging
|
||||
* 4. `RequestBytes`: how many bytes does the apache-orc library actually need to read the orc file
|
||||
* 5. `RequestIO`: how many times the apache-orc library calls this read interface
|
||||
* 6. `RequestTime`: how long it takes the apache-orc library to call this read interface
|
||||
*
|
||||
* It should be noted that `RangeCacheFileReader` is a wrapper of the reader that actually reads data,such as
|
||||
* the hdfs reader, so strictly speaking, `CacheRefreshCount` is not equal to how many IOs are initiated to hdfs,
|
||||
* because each time the hdfs reader is requested, the hdfs reader may not be able to read all the data at once.
|
||||
*/
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@ -857,28 +857,79 @@ Status OrcReader::set_fill_columns(
|
||||
if (_colname_to_value_range == nullptr || !_init_search_argument(_colname_to_value_range)) {
|
||||
_lazy_read_ctx.can_lazy_read = false;
|
||||
}
|
||||
|
||||
if (!_lazy_read_ctx.can_lazy_read) {
|
||||
for (auto& kv : _lazy_read_ctx.predicate_partition_columns) {
|
||||
_lazy_read_ctx.partition_columns.emplace(kv.first, kv.second);
|
||||
}
|
||||
for (auto& kv : _lazy_read_ctx.predicate_missing_columns) {
|
||||
_lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
|
||||
}
|
||||
}
|
||||
|
||||
_fill_all_columns = true;
|
||||
|
||||
// create orc row reader
|
||||
try {
|
||||
_row_reader_options.range(_range_start_offset, _range_size);
|
||||
_row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : _ctz);
|
||||
_row_reader_options.include(_read_cols);
|
||||
_row_reader_options.setEnableLazyDecoding(true);
|
||||
|
||||
uint64_t number_of_stripes = _reader->getNumberOfStripes();
|
||||
auto all_stripes_needed = _reader->getNeedReadStripes(_row_reader_options);
|
||||
|
||||
int64_t range_end_offset = _range_start_offset + _range_size;
|
||||
|
||||
// If you set "orc_tiny_stripe_threshold_bytes" = 0, the use tiny stripes merge io optimization will not be used.
|
||||
int64_t orc_tiny_stripe_threshold_bytes = 8L * 1024L * 1024L;
|
||||
int64_t orc_once_max_read_bytes = 8L * 1024L * 1024L;
|
||||
int64_t orc_max_merge_distance_bytes = 1L * 1024L * 1024L;
|
||||
|
||||
if (_state != nullptr) {
|
||||
orc_tiny_stripe_threshold_bytes =
|
||||
_state->query_options().orc_tiny_stripe_threshold_bytes;
|
||||
orc_once_max_read_bytes = _state->query_options().orc_once_max_read_bytes;
|
||||
orc_max_merge_distance_bytes = _state->query_options().orc_max_merge_distance_bytes;
|
||||
}
|
||||
|
||||
bool all_tiny_stripes = true;
|
||||
std::vector<io::PrefetchRange> tiny_stripe_ranges;
|
||||
|
||||
for (uint64_t i = 0; i < number_of_stripes; i++) {
|
||||
std::unique_ptr<orc::StripeInformation> strip_info = _reader->getStripe(i);
|
||||
uint64_t strip_start_offset = strip_info->getOffset();
|
||||
uint64_t strip_end_offset = strip_start_offset + strip_info->getLength();
|
||||
|
||||
if (strip_start_offset >= range_end_offset || strip_end_offset < _range_start_offset ||
|
||||
!all_stripes_needed[i]) {
|
||||
continue;
|
||||
}
|
||||
if (strip_info->getLength() > orc_tiny_stripe_threshold_bytes) {
|
||||
all_tiny_stripes = false;
|
||||
break;
|
||||
}
|
||||
|
||||
tiny_stripe_ranges.emplace_back(strip_start_offset, strip_end_offset);
|
||||
}
|
||||
if (all_tiny_stripes && number_of_stripes > 0) {
|
||||
std::vector<io::PrefetchRange> prefetch_merge_ranges =
|
||||
io::PrefetchRange::merge_adjacent_seq_ranges(tiny_stripe_ranges,
|
||||
orc_max_merge_distance_bytes,
|
||||
orc_once_max_read_bytes);
|
||||
auto range_finder =
|
||||
std::make_shared<io::LinearProbeRangeFinder>(std::move(prefetch_merge_ranges));
|
||||
|
||||
auto* orc_input_stream_ptr = static_cast<ORCFileInputStream*>(_reader->getStream());
|
||||
orc_input_stream_ptr->set_all_tiny_stripes();
|
||||
auto& orc_file_reader = orc_input_stream_ptr->get_file_reader();
|
||||
auto orc_inner_reader = orc_input_stream_ptr->get_inner_reader();
|
||||
orc_file_reader = std::make_shared<io::RangeCacheFileReader>(_profile, orc_inner_reader,
|
||||
range_finder);
|
||||
}
|
||||
|
||||
if (!_lazy_read_ctx.can_lazy_read) {
|
||||
for (auto& kv : _lazy_read_ctx.predicate_partition_columns) {
|
||||
_lazy_read_ctx.partition_columns.emplace(kv.first, kv.second);
|
||||
}
|
||||
for (auto& kv : _lazy_read_ctx.predicate_missing_columns) {
|
||||
_lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
|
||||
}
|
||||
}
|
||||
|
||||
_fill_all_columns = true;
|
||||
// create orc row reader
|
||||
if (_lazy_read_ctx.can_lazy_read) {
|
||||
_row_reader_options.filter(_lazy_read_ctx.predicate_orc_columns);
|
||||
_orc_filter = std::unique_ptr<ORCFilterImpl>(new ORCFilterImpl(this));
|
||||
}
|
||||
_row_reader_options.setEnableLazyDecoding(true);
|
||||
if (!_lazy_read_ctx.conjuncts.empty()) {
|
||||
_string_dict_filter = std::make_unique<StringDictFilterImpl>(this);
|
||||
}
|
||||
@ -2416,6 +2467,9 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column(
|
||||
void ORCFileInputStream::beforeReadStripe(
|
||||
std::unique_ptr<orc::StripeInformation> current_strip_information,
|
||||
std::vector<bool> selected_columns) {
|
||||
if (_is_all_tiny_stripes) {
|
||||
return;
|
||||
}
|
||||
if (_file_reader != nullptr) {
|
||||
_file_reader->collect_profile_before_close();
|
||||
}
|
||||
|
||||
@ -34,6 +34,7 @@
|
||||
#include "common/status.h"
|
||||
#include "exec/olap_common.h"
|
||||
#include "io/file_factory.h"
|
||||
#include "io/fs/buffered_reader.h"
|
||||
#include "io/fs/file_reader.h"
|
||||
#include "io/fs/file_reader_writer_fwd.h"
|
||||
#include "olap/olap_common.h"
|
||||
@ -642,7 +643,11 @@ public:
|
||||
_io_ctx(io_ctx),
|
||||
_profile(profile) {}
|
||||
|
||||
~ORCFileInputStream() override = default;
|
||||
~ORCFileInputStream() override {
|
||||
if (_file_reader != nullptr) {
|
||||
_file_reader->collect_profile_before_close();
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t getLength() const override { return _file_reader->size(); }
|
||||
|
||||
@ -655,6 +660,12 @@ public:
|
||||
void beforeReadStripe(std::unique_ptr<orc::StripeInformation> current_strip_information,
|
||||
std::vector<bool> selected_columns) override;
|
||||
|
||||
void set_all_tiny_stripes() { _is_all_tiny_stripes = true; }
|
||||
|
||||
io::FileReaderSPtr& get_file_reader() { return _file_reader; }
|
||||
|
||||
io::FileReaderSPtr& get_inner_reader() { return _inner_reader; }
|
||||
|
||||
protected:
|
||||
void _collect_profile_at_runtime() override {};
|
||||
void _collect_profile_before_close() override;
|
||||
@ -663,10 +674,10 @@ private:
|
||||
const std::string& _file_name;
|
||||
io::FileReaderSPtr _inner_reader;
|
||||
io::FileReaderSPtr _file_reader;
|
||||
bool _is_all_tiny_stripes = false;
|
||||
// Owned by OrcReader
|
||||
OrcReader::Statistics* _statistics = nullptr;
|
||||
const io::IOContext* _io_ctx = nullptr;
|
||||
RuntimeProfile* _profile = nullptr;
|
||||
};
|
||||
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -118,6 +118,36 @@ private:
|
||||
io::Path _path = "/tmp/mock";
|
||||
};
|
||||
|
||||
class TestingRangeCacheFileReader : public io::FileReader {
|
||||
public:
|
||||
TestingRangeCacheFileReader(std::shared_ptr<io::FileReader> delegate) : _delegate(delegate) {};
|
||||
|
||||
~TestingRangeCacheFileReader() override = default;
|
||||
|
||||
Status close() override { return _delegate->close(); }
|
||||
|
||||
const io::Path& path() const override { return _delegate->path(); }
|
||||
|
||||
size_t size() const override { return _delegate->size(); }
|
||||
|
||||
bool closed() const override { return _delegate->closed(); }
|
||||
|
||||
const io::PrefetchRange& last_read_range() const { return *_last_read_range; }
|
||||
|
||||
std::shared_ptr<io::FileSystem> fs() const override { return _delegate->fs(); }
|
||||
|
||||
protected:
|
||||
Status read_at_impl(size_t offset, Slice result, size_t* bytes_read,
|
||||
const io::IOContext* io_ctx) override {
|
||||
_last_read_range = std::make_unique<io::PrefetchRange>(offset, offset + result.size);
|
||||
return _delegate->read_at_impl(offset, result, bytes_read, io_ctx);
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<io::FileReader> _delegate;
|
||||
std::unique_ptr<io::PrefetchRange> _last_read_range;
|
||||
};
|
||||
|
||||
TEST_F(BufferedReaderTest, normal_use) {
|
||||
// buffered_reader_test_file 950 bytes
|
||||
io::FileReaderSPtr local_reader;
|
||||
@ -402,4 +432,84 @@ TEST_F(BufferedReaderTest, test_merged_io) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BufferedReaderTest, test_range_cache_file_reader) {
|
||||
io::FileReaderSPtr offset_reader = std::make_shared<MockOffsetFileReader>(128 * 1024 * 1024);
|
||||
auto testing_reader = std::make_shared<TestingRangeCacheFileReader>(offset_reader);
|
||||
|
||||
int64_t orc_max_merge_distance = 1L * 1024L * 1024L;
|
||||
int64_t orc_once_max_read_size = 8L * 1024L * 1024L;
|
||||
|
||||
{
|
||||
std::vector<io::PrefetchRange> tiny_stripe_ranges = {
|
||||
io::PrefetchRange(3, 33),
|
||||
io::PrefetchRange(33, 63),
|
||||
io::PrefetchRange(63, 8L * 1024L * 1024L + 63),
|
||||
};
|
||||
std::vector<io::PrefetchRange> prefetch_merge_ranges =
|
||||
io::PrefetchRange::merge_adjacent_seq_ranges(
|
||||
tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size);
|
||||
auto range_finder =
|
||||
std::make_shared<io::LinearProbeRangeFinder>(std::move(prefetch_merge_ranges));
|
||||
io::RangeCacheFileReader range_cache_file_reader(nullptr, testing_reader, range_finder);
|
||||
char data[1];
|
||||
Slice result(data, 1);
|
||||
size_t bytes_read;
|
||||
EXPECT_TRUE(range_cache_file_reader.read_at(3, result, &bytes_read, nullptr).ok());
|
||||
EXPECT_EQ(io::PrefetchRange(3, 63), testing_reader->last_read_range());
|
||||
|
||||
EXPECT_TRUE(range_cache_file_reader.read_at(63, result, &bytes_read, nullptr).ok());
|
||||
EXPECT_EQ(io::PrefetchRange(63, 8 * 1024L * 1024L + 63), testing_reader->last_read_range());
|
||||
EXPECT_TRUE(range_cache_file_reader.close().ok());
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<io::PrefetchRange> tiny_stripe_ranges = {
|
||||
io::PrefetchRange(3, 33),
|
||||
io::PrefetchRange(33, 63),
|
||||
io::PrefetchRange(63, 8L * 1024L * 1024L + 63),
|
||||
};
|
||||
std::vector<io::PrefetchRange> prefetch_merge_ranges =
|
||||
io::PrefetchRange::merge_adjacent_seq_ranges(
|
||||
tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size);
|
||||
auto range_finder =
|
||||
std::make_shared<io::LinearProbeRangeFinder>(std::move(prefetch_merge_ranges));
|
||||
io::RangeCacheFileReader range_cache_file_reader(nullptr, testing_reader, range_finder);
|
||||
char data[1];
|
||||
Slice result(data, 1);
|
||||
size_t bytes_read;
|
||||
EXPECT_TRUE(range_cache_file_reader.read_at(62, result, &bytes_read, nullptr).ok());
|
||||
EXPECT_EQ(io::PrefetchRange(3, 63), testing_reader->last_read_range());
|
||||
|
||||
EXPECT_TRUE(range_cache_file_reader.read_at(63, result, &bytes_read, nullptr).ok());
|
||||
EXPECT_EQ(io::PrefetchRange(63, 8L * 1024L * 1024L + 63),
|
||||
testing_reader->last_read_range());
|
||||
EXPECT_TRUE(range_cache_file_reader.close().ok());
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<io::PrefetchRange> tiny_stripe_ranges = {
|
||||
io::PrefetchRange(3, 3),
|
||||
io::PrefetchRange(4, 1048576L * 5L + 4),
|
||||
io::PrefetchRange(1048576L * 5L + 4, 1048576L * 3L + 1048576L * 5L + 4),
|
||||
};
|
||||
std::vector<io::PrefetchRange> prefetch_merge_ranges =
|
||||
io::PrefetchRange::merge_adjacent_seq_ranges(
|
||||
tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size);
|
||||
auto range_finder =
|
||||
std::make_shared<io::LinearProbeRangeFinder>(std::move(prefetch_merge_ranges));
|
||||
io::RangeCacheFileReader range_cache_file_reader(nullptr, testing_reader, range_finder);
|
||||
char data[1];
|
||||
Slice result(data, 1);
|
||||
size_t bytes_read;
|
||||
EXPECT_TRUE(range_cache_file_reader.read_at(3, result, &bytes_read, nullptr).ok());
|
||||
EXPECT_EQ(io::PrefetchRange(3, 1 + 1048576 * 5 + 3), testing_reader->last_read_range());
|
||||
|
||||
EXPECT_TRUE(range_cache_file_reader.read_at(4 + 1048576 * 5, result, &bytes_read, nullptr)
|
||||
.ok());
|
||||
EXPECT_EQ(io::PrefetchRange(4 + 1048576 * 5, 3 * 1048576 + 4 + 1048576 * 5),
|
||||
testing_reader->last_read_range());
|
||||
EXPECT_TRUE(range_cache_file_reader.close().ok());
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace doris
|
||||
|
||||
@ -0,0 +1,11 @@
|
||||
use `default`;
|
||||
|
||||
CREATE TABLE `orc_tiny_stripes`(
|
||||
col1 bigint,
|
||||
col2 string,
|
||||
col3 bigint
|
||||
)
|
||||
STORED AS orc
|
||||
LOCATION '/user/doris/preinstalled_data/orc/orc_tiny_stripes';
|
||||
|
||||
msck repair table orc_tiny_stripes;
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -447,6 +447,12 @@ public class SessionVariable implements Serializable, Writable {
|
||||
|
||||
public static final String ENABLE_ORC_LAZY_MAT = "enable_orc_lazy_materialization";
|
||||
|
||||
public static final String ORC_TINY_STRIPE_THRESHOLD_BYTES = "orc_tiny_stripe_threshold_bytes";
|
||||
|
||||
public static final String ORC_ONCE_MAX_READ_BYTES = "orc_once_max_read_bytes";
|
||||
|
||||
public static final String ORC_MAX_MERGE_DISTANCE_BYTES = "orc_max_merge_distance_bytes";
|
||||
|
||||
public static final String ENABLE_PARQUET_FILTER_BY_MIN_MAX = "enable_parquet_filter_by_min_max";
|
||||
|
||||
public static final String ENABLE_ORC_FILTER_BY_MIN_MAX = "enable_orc_filter_by_min_max";
|
||||
@ -1648,6 +1654,46 @@ public class SessionVariable implements Serializable, Writable {
|
||||
public boolean enableOrcLazyMat = true;
|
||||
|
||||
|
||||
@VariableMgr.VarAttr(
|
||||
name = ORC_TINY_STRIPE_THRESHOLD_BYTES,
|
||||
description = {"在orc文件中如果一个stripe的字节大小小于`orc_tiny_stripe_threshold`,"
|
||||
+ "我们认为该stripe为 tiny stripe。对于多个连续的tiny stripe我们会进行读取优化,即一次性读多个tiny stripe."
|
||||
+ "如果你不想使用该优化,可以将该值设置为0。默认为 8M。",
|
||||
"In an orc file, if the byte size of a stripe is less than `orc_tiny_stripe_threshold`,"
|
||||
+ "we consider the stripe to be a tiny stripe. For multiple consecutive tiny stripes,"
|
||||
+ "we will perform read optimization, that is, read multiple tiny stripes at a time."
|
||||
+ "If you do not want to use this optimization, you can set this value to 0."
|
||||
+ "The default is 8M."},
|
||||
needForward = true,
|
||||
setter = "setOrcTinyStripeThresholdBytes")
|
||||
public long orcTinyStripeThresholdBytes = 8L * 1024L * 1024L;
|
||||
|
||||
|
||||
@VariableMgr.VarAttr(
|
||||
name = ORC_ONCE_MAX_READ_BYTES,
|
||||
description = {"在使用tiny stripe读取优化的时候,会对多个tiny stripe合并成一次IO,"
|
||||
+ "该参数用来控制每次IO请求的最大字节大小。你不应该将值设置的小于`orc_tiny_stripe_threshold`。默认为 8M。",
|
||||
"When using tiny stripe read optimization, multiple tiny stripes will be merged into one IO."
|
||||
+ "This parameter is used to control the maximum byte size of each IO request."
|
||||
+ "You should not set the value less than `orc_tiny_stripe_threshold`."
|
||||
+ "The default is 8M."},
|
||||
needForward = true,
|
||||
setter = "setOrcOnceMaxReadBytes")
|
||||
public long orcOnceMaxReadBytes = 8L * 1024L * 1024L;
|
||||
|
||||
|
||||
@VariableMgr.VarAttr(
|
||||
name = ORC_MAX_MERGE_DISTANCE_BYTES,
|
||||
description = {"在使用tiny stripe读取优化的时候,由于tiny stripe并不一定连续。"
|
||||
+ "当两个tiny stripe之间距离大于该参数时,我们不会将其合并成一次IO。默认为 1M。",
|
||||
"When using tiny stripe read optimization, since tiny stripes are not necessarily continuous,"
|
||||
+ "when the distance between two tiny stripes is greater than this parameter,"
|
||||
+ "we will not merge them into one IO. The default value is 1M."},
|
||||
needForward = true,
|
||||
setter = "setOrcMaxMergeDistanceBytes")
|
||||
public long orcMaxMergeDistanceBytes = 1024L * 1024L;
|
||||
|
||||
|
||||
@VariableMgr.VarAttr(
|
||||
name = ENABLE_PARQUET_FILTER_BY_MIN_MAX,
|
||||
description = {"控制 parquet reader 是否启用 min-max 值过滤。默认为 true。",
|
||||
@ -2662,6 +2708,32 @@ public class SessionVariable implements Serializable, Writable {
|
||||
this.parallelExecInstanceNum = val;
|
||||
}
|
||||
|
||||
public void setOrcTinyStripeThresholdBytes(String value) throws Exception {
|
||||
long val = checkFieldLongValue(ORC_TINY_STRIPE_THRESHOLD_BYTES, 0, value);
|
||||
this.orcTinyStripeThresholdBytes = val;
|
||||
}
|
||||
|
||||
public void setOrcOnceMaxReadBytes(String value) throws Exception {
|
||||
long val = checkFieldLongValue(ORC_ONCE_MAX_READ_BYTES, 0, value);
|
||||
this.orcOnceMaxReadBytes = val;
|
||||
}
|
||||
|
||||
public void setOrcMaxMergeDistanceBytes(String value) throws Exception {
|
||||
long val = checkFieldLongValue(ORC_MAX_MERGE_DISTANCE_BYTES, 0, value);
|
||||
this.orcMaxMergeDistanceBytes = val;
|
||||
}
|
||||
|
||||
private long checkFieldLongValue(String variableName, long minValue, String value) throws Exception {
|
||||
long val = Long.parseLong(value);
|
||||
if (val < minValue) {
|
||||
throw new Exception(
|
||||
variableName + " value should greater than or equal " + String.valueOf(minValue)
|
||||
+ ", you set value is: " + value);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
private int checkFieldValue(String variableName, int minValue, String value) throws Exception {
|
||||
int val = Integer.valueOf(value);
|
||||
if (val < minValue) {
|
||||
@ -3684,6 +3756,11 @@ public class SessionVariable implements Serializable, Writable {
|
||||
tResult.setAdaptivePipelineTaskSerialReadOnLimit(adaptivePipelineTaskSerialReadOnLimit);
|
||||
tResult.setInListValueCountThreshold(inListValueCountThreshold);
|
||||
tResult.setEnableAutoCreateWhenOverwrite(enableAutoCreateWhenOverwrite);
|
||||
|
||||
tResult.setOrcTinyStripeThresholdBytes(orcTinyStripeThresholdBytes);
|
||||
tResult.setOrcMaxMergeDistanceBytes(orcMaxMergeDistanceBytes);
|
||||
tResult.setOrcOnceMaxReadBytes(orcOnceMaxReadBytes);
|
||||
|
||||
return tResult;
|
||||
}
|
||||
|
||||
|
||||
@ -334,6 +334,10 @@ struct TQueryOptions {
|
||||
134: optional i32 partition_topn_pre_partition_rows = 1000;
|
||||
|
||||
137: optional bool enable_auto_create_when_overwrite = false;
|
||||
|
||||
138: optional i64 orc_tiny_stripe_threshold_bytes = 8388608;
|
||||
139: optional i64 orc_once_max_read_bytes = 8388608;
|
||||
140: optional i64 orc_max_merge_distance_bytes = 1048576;
|
||||
// For cloud, to control if the content would be written into file cache
|
||||
1000: optional bool disable_file_cache = false
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,203 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_orc_tiny_stripes", "p0,external,hive,external_docker,external_docker_hive") {
|
||||
|
||||
String enabled = context.config.otherConfigs.get("enableHiveTest")
|
||||
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
|
||||
logger.info("diable Hive test.")
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
for (String hivePrefix : ["hive2"]) {
|
||||
try {
|
||||
String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort")
|
||||
String catalog_name = "${hivePrefix}_test_orc_tiny_stripes"
|
||||
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
|
||||
|
||||
sql """drop catalog if exists ${catalog_name}"""
|
||||
sql """create catalog if not exists ${catalog_name} properties (
|
||||
"type"="hms",
|
||||
'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
|
||||
);"""
|
||||
sql """use `${catalog_name}`.`default`"""
|
||||
|
||||
|
||||
|
||||
|
||||
def orc_configs = [
|
||||
[0,0,0],
|
||||
[0,10230,1024],
|
||||
[1,1,1],
|
||||
[201,130,0],
|
||||
[1024,1024,0],
|
||||
[1024,1024,1024],
|
||||
[4096,1024,0],
|
||||
[1024,4096,0],
|
||||
[1,10240,10000000],
|
||||
[1000000,888888888,0],
|
||||
[1000000000000,1000000000000,100000000000]
|
||||
]
|
||||
def li = [ "set enable_orc_lazy_materialization=true;","set enable_orc_lazy_materialization=false;"]
|
||||
|
||||
|
||||
li.each { it1 ->
|
||||
sql it1
|
||||
|
||||
orc_configs.each { it2 ->
|
||||
def value1 = it2[0].toString()
|
||||
def value2 = it2[1].toString()
|
||||
def value3 = it2[2].toString()
|
||||
|
||||
sql "set orc_tiny_stripe_threshold_bytes = " + value1 + ";"
|
||||
sql "set orc_once_max_read_bytes = " + value2 + ";"
|
||||
sql "set orc_max_merge_distance_bytes = " + value3 + ";"
|
||||
|
||||
|
||||
qt_test_1 """ select count(*) from orc_tiny_stripes; """ //372
|
||||
|
||||
/*
|
||||
*/
|
||||
|
||||
qt_test_2 """ select * from orc_tiny_stripes where col1 = 1 order by col1,col2,col3; """
|
||||
/*
|
||||
1 str_1 10000000001
|
||||
1 str_1 10000000001
|
||||
*/
|
||||
qt_test_3 """ select * from orc_tiny_stripes where col1%100 = 0 order by col1,col2,col3 ; """
|
||||
/*
|
||||
0 str_0 10000000000
|
||||
0 str_0 10000000000
|
||||
100 9DPJaFc00euBteqiW1f1 10000000027
|
||||
100 str_100 10000000100
|
||||
2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034
|
||||
4800 TaWGgh4iZ 10000000115
|
||||
5700 SwOaGJj9fVbk5j0Np 10000000050
|
||||
*/
|
||||
|
||||
qt_test_4 """ select * from orc_tiny_stripes where col2 = "str_4" order by col1,col2,col3; """
|
||||
/*
|
||||
4 str_4 10000000004
|
||||
4 str_4 10000000004
|
||||
*/
|
||||
qt_test_5 """ select count(*) from orc_tiny_stripes where col3 > 10000000005; """ //348
|
||||
qt_test_6 """ select * from orc_tiny_stripes where col3 in ( 10000000005,10000000053,10000000146) order by col1,col2,col3 ; """
|
||||
/*
|
||||
5 str_5 10000000005
|
||||
5 str_5 10000000005
|
||||
53 str_53 10000000053
|
||||
146 str_146 10000000146
|
||||
3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053
|
||||
4129 qwPIwtkTZb 10000000005
|
||||
4942 vAdLpLUN3VkGNmTjvuPv 10000000053
|
||||
5349 koTeYPr2Qaqqnlk07X 10000000146
|
||||
5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005
|
||||
7573 e3lIPwNnbG6DPmog 10000000005
|
||||
8614 TtyopDvRptLB5 10000000005
|
||||
*/
|
||||
|
||||
qt_test_7 """ select * from orc_tiny_stripes where col3 in ( 10000000005,10000000053,10000000146) order by col1,col2,col3 ; """
|
||||
/*
|
||||
5 str_5 10000000005
|
||||
5 str_5 10000000005
|
||||
53 str_53 10000000053
|
||||
146 str_146 10000000146
|
||||
3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053
|
||||
4129 qwPIwtkTZb 10000000005
|
||||
4942 vAdLpLUN3VkGNmTjvuPv 10000000053
|
||||
5349 koTeYPr2Qaqqnlk07X 10000000146
|
||||
5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005
|
||||
7573 e3lIPwNnbG6DPmog 10000000005
|
||||
8614 TtyopDvRptLB5 10000000005
|
||||
*/
|
||||
|
||||
qt_test_8 """ select col3 from orc_tiny_stripes where col3 in ( 10000000005,10000000053,10000000146) order by col3 ; """
|
||||
/*
|
||||
10000000005
|
||||
10000000005
|
||||
10000000005
|
||||
10000000005
|
||||
10000000005
|
||||
10000000005
|
||||
10000000053
|
||||
10000000053
|
||||
10000000053
|
||||
10000000146
|
||||
10000000146
|
||||
*/
|
||||
|
||||
qt_test_9 """ select col1 from orc_tiny_stripes where col1 in (10,1000) order by col1 ; """ // 10
|
||||
qt_test_10 """ select col2 from orc_tiny_stripes where length(col2) > 29 order by col2 ; """
|
||||
/*
|
||||
1cx1jZ6QGRWAkskiOgURj6dscYxDOl
|
||||
Asn3tnIg1xYm8Lbgey8baqw3EmooFm
|
||||
MSBtFURjtMu3LyDTLYx9FBM23UQdZ1
|
||||
e8e7xgwaSI2JKI65FEThzSQBVmKeAZ
|
||||
w3xAirHLO1tvjon2jgr7y9tBtrGfMS
|
||||
zABBLCkowUIqfONQOAjir8YPkFqfDW
|
||||
*/
|
||||
qt_test_11 """ select * from orc_tiny_stripes where col1 < 10 order by col1,col2,col3; """
|
||||
/*
|
||||
0 str_0 10000000000
|
||||
0 str_0 10000000000
|
||||
1 str_1 10000000001
|
||||
1 str_1 10000000001
|
||||
2 str_2 10000000002
|
||||
2 str_2 10000000002
|
||||
3 str_3 10000000003
|
||||
3 str_3 10000000003
|
||||
4 str_4 10000000004
|
||||
4 str_4 10000000004
|
||||
5 str_5 10000000005
|
||||
5 str_5 10000000005
|
||||
6 str_6 10000000006
|
||||
7 str_7 10000000007
|
||||
8 str_8 10000000008
|
||||
9 str_9 10000000009
|
||||
*/
|
||||
|
||||
qt_test_12 """ select col1 from orc_tiny_stripes where col1 in(0,6 ) order by col1; """
|
||||
/*
|
||||
0
|
||||
0
|
||||
6
|
||||
*/
|
||||
|
||||
qt_test_13 """ select col1 from orc_tiny_stripes where col1 in(20,60 ) order by col1; """
|
||||
/*
|
||||
20
|
||||
60
|
||||
*/
|
||||
|
||||
qt_test_14 """ select col1 from orc_tiny_stripes where col1 in(40,0 ) order by col1; """
|
||||
/*
|
||||
0
|
||||
0
|
||||
40
|
||||
*/
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
sql """drop catalog if exists ${catalog_name}"""
|
||||
} finally {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user