Support multiple key ranges in RowwiseIterator and StorageReadOptions (#1704)
support multiple key ranges in RowwiseIterator and StorageReadOptions remove unused fields and member functions in RowBlock and ColumnData read num_rows_per_block from short key index footer
This commit is contained in:
@ -28,27 +28,44 @@ class RowBlockV2;
|
||||
class Schema;
|
||||
class Conditions;
|
||||
|
||||
struct StorageReadOptions {
|
||||
// lower_bound defines the smallest key at which iterator will
|
||||
// return data.
|
||||
// If lower_bound is null, won't return
|
||||
std::shared_ptr<RowCursor> lower_bound;
|
||||
class StorageReadOptions {
|
||||
public:
|
||||
struct KeyRange {
|
||||
KeyRange()
|
||||
: lower_key(nullptr),
|
||||
include_lower(false),
|
||||
upper_key(nullptr),
|
||||
include_upper(false) {
|
||||
}
|
||||
|
||||
// If include_lower_bound is true, data equal with lower_bound will
|
||||
// be read
|
||||
bool include_lower_bound = false;
|
||||
KeyRange(const RowCursor* lower_key_,
|
||||
bool include_lower_,
|
||||
const RowCursor* upper_key_,
|
||||
bool include_upper_)
|
||||
: lower_key(lower_key_),
|
||||
include_lower(include_lower_),
|
||||
upper_key(upper_key_),
|
||||
include_upper(include_upper_) {
|
||||
}
|
||||
|
||||
// upper_bound defines the extend upto which the iterator can return
|
||||
// data.
|
||||
std::shared_ptr<RowCursor> upper_bound;
|
||||
// the lower bound of the range, nullptr if not existed
|
||||
const RowCursor* lower_key;
|
||||
// whether `lower_key` is included in the range
|
||||
bool include_lower;
|
||||
// the upper bound of the range, nullptr if not existed
|
||||
const RowCursor* upper_key;
|
||||
// whether `upper_key` is included in the range
|
||||
bool include_upper;
|
||||
};
|
||||
|
||||
// If include_upper_bound is true, data equal with upper_bound will
|
||||
// be read
|
||||
bool include_upper_bound = false;
|
||||
// reader's key ranges, empty if not existed.
|
||||
// used by short key index to filter row blocks
|
||||
std::vector<KeyRange> key_ranges;
|
||||
|
||||
// reader's column predicates
|
||||
// used by zone map/bloom filter/secondary index to prune data
|
||||
std::shared_ptr<Conditions> conditions;
|
||||
// reader's column predicates, nullptr if not existed.
|
||||
// used by column index to filter pages and rows
|
||||
// TODO use vector<ColumnPredicate*> instead
|
||||
const Conditions* conditions = nullptr;
|
||||
};
|
||||
|
||||
// Used to read data in RowBlockV2 one by one
|
||||
|
||||
@ -49,7 +49,6 @@ RowBlock::~RowBlock() {
|
||||
}
|
||||
|
||||
OLAPStatus RowBlock::init(const RowBlockInfo& block_info) {
|
||||
_field_count = _schema->num_columns();
|
||||
_info = block_info;
|
||||
_null_supported = block_info.null_supported;
|
||||
_capacity = _info.row_num;
|
||||
|
||||
@ -162,9 +162,6 @@ private:
|
||||
|
||||
bool _null_supported;
|
||||
|
||||
size_t _field_count = 0;
|
||||
bool _need_checksum = true;
|
||||
|
||||
// Data in memory is construct from row cursors, these row cursors's size is equal
|
||||
char* _mem_buf = nullptr;
|
||||
// equal with _mem_row_bytes * _info.row_num
|
||||
|
||||
@ -482,21 +482,6 @@ OLAPStatus ColumnData::get_first_row_block(RowBlock** row_block) {
|
||||
return OLAP_SUCCESS;
|
||||
}
|
||||
|
||||
OLAPStatus ColumnData::get_next_row_block(RowBlock** row_block) {
|
||||
_is_normal_read = true;
|
||||
OLAPStatus res = _get_block(false);
|
||||
if (res != OLAP_SUCCESS) {
|
||||
if (res != OLAP_ERR_DATA_EOF) {
|
||||
OLAP_LOG_WARNING("fail to load data to row block. [res=%d]", res);
|
||||
}
|
||||
*row_block = nullptr;
|
||||
return res;
|
||||
}
|
||||
|
||||
*row_block = _read_block.get();
|
||||
return OLAP_SUCCESS;
|
||||
}
|
||||
|
||||
bool ColumnData::rowset_pruning_filter() {
|
||||
if (empty() || zero_num_rows()) {
|
||||
return true;
|
||||
@ -516,7 +501,7 @@ int ColumnData::delete_pruning_filter() {
|
||||
return DEL_NOT_SATISFIED;
|
||||
}
|
||||
|
||||
if (false == _segment_group->has_zone_maps()) {
|
||||
if (!_segment_group->has_zone_maps()) {
|
||||
/*
|
||||
* if segment_group has no column statistics, we cannot judge whether the data can be filtered or not
|
||||
*/
|
||||
@ -549,9 +534,9 @@ int ColumnData::delete_pruning_filter() {
|
||||
}
|
||||
}
|
||||
|
||||
if (true == del_stastified) {
|
||||
if (del_stastified) {
|
||||
ret = DEL_SATISFIED;
|
||||
} else if (true == del_partial_stastified) {
|
||||
} else if (del_partial_stastified) {
|
||||
ret = DEL_PARTIAL_SATISFIED;
|
||||
} else {
|
||||
ret = DEL_NOT_SATISFIED;
|
||||
|
||||
@ -76,7 +76,6 @@ public:
|
||||
RuntimeState* runtime_state);
|
||||
|
||||
OLAPStatus get_first_row_block(RowBlock** row_block);
|
||||
OLAPStatus get_next_row_block(RowBlock** row_block);
|
||||
|
||||
// Only used to binary search in full-key find row
|
||||
const RowCursor* seek_and_get_current_row(const RowBlockPosition& position);
|
||||
|
||||
@ -34,12 +34,10 @@ using strings::Substitute;
|
||||
|
||||
Segment::Segment(
|
||||
std::string fname, uint32_t segment_id,
|
||||
const std::shared_ptr<TabletSchema>& tablet_schema,
|
||||
size_t num_rows_per_block)
|
||||
const TabletSchema* tablet_schema)
|
||||
: _fname(std::move(fname)),
|
||||
_segment_id(segment_id),
|
||||
_tablet_schema(tablet_schema),
|
||||
_num_rows_per_block(num_rows_per_block) {
|
||||
_tablet_schema(tablet_schema) {
|
||||
}
|
||||
|
||||
Segment::~Segment() {
|
||||
@ -71,9 +69,10 @@ Status Segment::open() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Segment::new_iterator(const Schema& schema, std::unique_ptr<SegmentIterator>* output) {
|
||||
output->reset(new SegmentIterator(this->shared_from_this(), schema));
|
||||
return Status::OK();
|
||||
std::unique_ptr<SegmentIterator> Segment::new_iterator(const Schema& schema, const StorageReadOptions& read_options) {
|
||||
auto it = std::unique_ptr<SegmentIterator>(new SegmentIterator(this->shared_from_this(), schema));
|
||||
it->init(read_options);
|
||||
return it;
|
||||
}
|
||||
|
||||
// Read data at offset of input file, check if the file content match the magic
|
||||
|
||||
@ -33,16 +33,18 @@ namespace doris {
|
||||
|
||||
class RandomAccessFile;
|
||||
class SegmentGroup;
|
||||
class FieldInfo;
|
||||
class TabletSchema;
|
||||
class ShortKeyIndexDecoder;
|
||||
class Schema;
|
||||
class StorageReadOptions;
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
class ColumnReader;
|
||||
class ColumnIterator;
|
||||
class Segment;
|
||||
class SegmentIterator;
|
||||
using SegmentSharedPtr = std::shared_ptr<Segment>;
|
||||
|
||||
// A Segment is used to represent a segment in memory format. When segment is
|
||||
// generated, it won't be modified, so this struct aimed to help read operation.
|
||||
@ -55,13 +57,12 @@ class SegmentIterator;
|
||||
class Segment : public std::enable_shared_from_this<Segment> {
|
||||
public:
|
||||
Segment(std::string fname, uint32_t segment_id,
|
||||
const std::shared_ptr<TabletSchema>& tablet_schema,
|
||||
size_t num_rows_per_block);
|
||||
const TabletSchema* tablet_schema);
|
||||
~Segment();
|
||||
|
||||
Status open();
|
||||
|
||||
Status new_iterator(const Schema& schema, std::unique_ptr<SegmentIterator>* iter);
|
||||
std::unique_ptr<SegmentIterator> new_iterator(const Schema& schema, const StorageReadOptions& read_options);
|
||||
|
||||
uint64_t id() const { return _segment_id; }
|
||||
|
||||
@ -71,7 +72,7 @@ private:
|
||||
friend class SegmentIterator;
|
||||
|
||||
Status new_column_iterator(uint32_t cid, ColumnIterator** iter);
|
||||
uint32_t num_rows_per_block() const { return _num_rows_per_block; }
|
||||
uint32_t num_rows_per_block() const { return _sk_index_decoder->num_rows_per_block(); }
|
||||
size_t num_short_keys() const { return _tablet_schema->num_short_key_columns(); }
|
||||
|
||||
Status _check_magic(uint64_t offset);
|
||||
@ -97,8 +98,7 @@ private:
|
||||
private:
|
||||
std::string _fname;
|
||||
uint32_t _segment_id;
|
||||
std::shared_ptr<TabletSchema> _tablet_schema;
|
||||
uint32_t _num_rows_per_block;
|
||||
const TabletSchema* _tablet_schema;
|
||||
|
||||
SegmentFooterPB _footer;
|
||||
std::unique_ptr<RandomAccessFile> _input_file;
|
||||
|
||||
@ -34,11 +34,13 @@ namespace segment_v2 {
|
||||
|
||||
SegmentIterator::SegmentIterator(std::shared_ptr<Segment> segment,
|
||||
const Schema& schema)
|
||||
: _segment(std::move(segment)),
|
||||
_schema(schema),
|
||||
_cur_range_id(0),
|
||||
_column_iterators(_schema.num_columns(), nullptr),
|
||||
_cur_rowid(0) {
|
||||
: _segment(std::move(segment)),
|
||||
_schema(schema),
|
||||
_column_iterators(_schema.num_columns(), nullptr),
|
||||
_row_ranges(RowRanges::create_single(_segment->num_rows())),
|
||||
_cur_rowid(0),
|
||||
_cur_range_id(0),
|
||||
_inited(false) {
|
||||
}
|
||||
|
||||
SegmentIterator::~SegmentIterator() {
|
||||
@ -47,72 +49,66 @@ SegmentIterator::~SegmentIterator() {
|
||||
}
|
||||
}
|
||||
|
||||
Status SegmentIterator::init(const StorageReadOptions& opts) {
|
||||
Status SegmentIterator::_init() {
|
||||
DorisMetrics::segment_read_total.increment(1);
|
||||
_opts = opts;
|
||||
RETURN_IF_ERROR(_init_short_key_range());
|
||||
RETURN_IF_ERROR(_init_row_ranges());
|
||||
RETURN_IF_ERROR(_get_row_ranges_by_keys());
|
||||
RETURN_IF_ERROR(_get_row_ranges_by_column_conditions());
|
||||
if (!_row_ranges.is_empty()) {
|
||||
_cur_range_id = 0;
|
||||
_cur_rowid = _row_ranges.get_range_from(_cur_range_id);
|
||||
}
|
||||
RETURN_IF_ERROR(_init_column_iterators());
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// This function will use input key bounds to get a row range.
|
||||
Status SegmentIterator::_init_short_key_range() {
|
||||
Status SegmentIterator::_get_row_ranges_by_keys() {
|
||||
DorisMetrics::segment_row_total.increment(num_rows());
|
||||
_lower_rowid = 0;
|
||||
_upper_rowid = num_rows();
|
||||
// initial short key row ranges: [0, num_rows())
|
||||
_row_ranges = RowRanges::create_single(_lower_rowid, _upper_rowid);
|
||||
|
||||
// fast path for empty segment
|
||||
if (_upper_rowid == 0) {
|
||||
// fast path for empty segment or empty key ranges
|
||||
if (_row_ranges.is_empty() || _opts.key_ranges.empty()) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (_opts.lower_bound == nullptr && _opts.upper_bound == nullptr) {
|
||||
return Status::OK();
|
||||
RowRanges result_ranges;
|
||||
for (auto& key_range : _opts.key_ranges) {
|
||||
rowid_t lower_rowid = 0;
|
||||
rowid_t upper_rowid = num_rows();
|
||||
RETURN_IF_ERROR(_prepare_seek(key_range));
|
||||
if (key_range.upper_key != nullptr) {
|
||||
// If client want to read upper_bound, the include_upper is true. So we
|
||||
// should get the first ordinal at which key is larger than upper_bound.
|
||||
// So we call _lookup_ordinal with include_upper's negate
|
||||
RETURN_IF_ERROR(_lookup_ordinal(
|
||||
*key_range.upper_key, !key_range.include_upper, num_rows(), &upper_rowid));
|
||||
}
|
||||
if (upper_rowid > 0 && key_range.lower_key != nullptr) {
|
||||
RETURN_IF_ERROR(
|
||||
_lookup_ordinal(*key_range.lower_key, key_range.include_lower, upper_rowid, &lower_rowid));
|
||||
}
|
||||
auto row_range = RowRanges::create_single(lower_rowid, upper_rowid);
|
||||
RowRanges::ranges_union(result_ranges, row_range, &result_ranges);
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_prepare_seek());
|
||||
|
||||
// init row range with short key range
|
||||
if (_opts.upper_bound != nullptr) {
|
||||
// If client want to read upper_bound, the include_upper_bound is true. So we
|
||||
// should get the first ordinal at which key is larger than upper_bound.
|
||||
// So we call _lookup_ordinal with include_upper_bound's negate
|
||||
RETURN_IF_ERROR(_lookup_ordinal(
|
||||
*_opts.upper_bound, !_opts.include_upper_bound, num_rows(), &_upper_rowid));
|
||||
}
|
||||
if (_upper_rowid > 0 && _opts.lower_bound != nullptr) {
|
||||
RETURN_IF_ERROR(_lookup_ordinal(
|
||||
*_opts.lower_bound, _opts.include_lower_bound, _upper_rowid, &_lower_rowid));
|
||||
}
|
||||
// seeked short key row ranges: [_lower_rowid, _upper_rowid)
|
||||
_row_ranges = RowRanges::create_single(_lower_rowid, _upper_rowid);
|
||||
DorisMetrics::segment_rows_by_short_key.increment(_upper_rowid - _lower_rowid);
|
||||
// pre-condition: _row_ranges == [0, num_rows)
|
||||
_row_ranges = std::move(result_ranges);
|
||||
DorisMetrics::segment_rows_by_short_key.increment(_row_ranges.count());
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Set up environment for the following seek.
|
||||
Status SegmentIterator::_prepare_seek() {
|
||||
Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_range) {
|
||||
std::vector<const Field*> key_fields;
|
||||
std::set<uint32_t> column_set;
|
||||
if (_opts.lower_bound != nullptr) {
|
||||
for (auto cid : _opts.lower_bound->schema()->column_ids()) {
|
||||
if (key_range.lower_key != nullptr) {
|
||||
for (auto cid : key_range.lower_key->schema()->column_ids()) {
|
||||
column_set.emplace(cid);
|
||||
key_fields.emplace_back(_opts.lower_bound->schema()->column(cid));
|
||||
key_fields.emplace_back(key_range.lower_key->schema()->column(cid));
|
||||
}
|
||||
}
|
||||
if (_opts.upper_bound != nullptr) {
|
||||
for (auto cid : _opts.upper_bound->schema()->column_ids()) {
|
||||
if (key_range.upper_key != nullptr) {
|
||||
for (auto cid : key_range.upper_key->schema()->column_ids()) {
|
||||
if (column_set.count(cid) == 0) {
|
||||
key_fields.emplace_back(_opts.upper_bound->schema()->column(cid));
|
||||
key_fields.emplace_back(key_range.upper_key->schema()->column(cid));
|
||||
column_set.emplace(cid);
|
||||
}
|
||||
}
|
||||
@ -123,15 +119,15 @@ Status SegmentIterator::_prepare_seek() {
|
||||
// create used column iterator
|
||||
for (auto cid : _seek_schema->column_ids()) {
|
||||
if (_column_iterators[cid] == nullptr) {
|
||||
RETURN_IF_ERROR(_create_column_iterator(cid, &_column_iterators[cid]));
|
||||
RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid]));
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SegmentIterator::_init_row_ranges() {
|
||||
if (_lower_rowid == _upper_rowid) {
|
||||
Status SegmentIterator::_get_row_ranges_by_column_conditions() {
|
||||
if (_row_ranges.is_empty()) {
|
||||
// no data just return;
|
||||
return Status::OK();
|
||||
}
|
||||
@ -174,7 +170,7 @@ Status SegmentIterator::_init_column_iterators() {
|
||||
}
|
||||
for (auto cid : _schema.column_ids()) {
|
||||
if (_column_iterators[cid] == nullptr) {
|
||||
RETURN_IF_ERROR(_create_column_iterator(cid, &_column_iterators[cid]));
|
||||
RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid]));
|
||||
}
|
||||
|
||||
_column_iterators[cid]->seek_to_ordinal(_cur_rowid);
|
||||
@ -182,10 +178,6 @@ Status SegmentIterator::_init_column_iterators() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SegmentIterator::_create_column_iterator(uint32_t cid, ColumnIterator** iter) {
|
||||
return _segment->new_column_iterator(cid, iter);
|
||||
}
|
||||
|
||||
// Schema of lhs and rhs are different.
|
||||
// callers should assure that rhs' schema has all columns in lhs schema
|
||||
template<typename LhsRowType, typename RhsRowType>
|
||||
@ -297,6 +289,11 @@ Status SegmentIterator::_next_batch(RowBlockV2* block, size_t* rows_read) {
|
||||
}
|
||||
|
||||
Status SegmentIterator::next_batch(RowBlockV2* block) {
|
||||
if (UNLIKELY(!_inited)) {
|
||||
RETURN_IF_ERROR(_init());
|
||||
_inited = true;
|
||||
}
|
||||
|
||||
if (_row_ranges.is_empty() || _cur_rowid >= _row_ranges.to()) {
|
||||
block->resize(0);
|
||||
return Status::EndOfFile("no more data in segment");
|
||||
@ -304,7 +301,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
|
||||
size_t rows_to_read = block->capacity();
|
||||
while (rows_to_read > 0) {
|
||||
if (_cur_rowid >= _row_ranges.get_range_to(_cur_range_id)) {
|
||||
// current row range is read over,
|
||||
// current row range is read over, trying to read from next range
|
||||
if (_cur_range_id >= _row_ranges.range_size() - 1) {
|
||||
// there is no more row range
|
||||
break;
|
||||
|
||||
@ -45,20 +45,29 @@ class SegmentIterator : public RowwiseIterator {
|
||||
public:
|
||||
SegmentIterator(std::shared_ptr<Segment> segment, const Schema& _schema);
|
||||
~SegmentIterator() override;
|
||||
Status init(const StorageReadOptions& opts) override;
|
||||
Status init(const StorageReadOptions& opts) override {
|
||||
_opts = opts;
|
||||
return Status::OK();
|
||||
}
|
||||
Status next_batch(RowBlockV2* row_block) override;
|
||||
const Schema& schema() const override { return _schema; }
|
||||
private:
|
||||
Status _init_short_key_range();
|
||||
Status _prepare_seek();
|
||||
Status _init_row_ranges();
|
||||
Status _get_row_ranges_from_zone_map(RowRanges* zone_map_row_ranges);
|
||||
Status _init_column_iterators();
|
||||
Status _create_column_iterator(uint32_t cid, ColumnIterator** iter);
|
||||
Status _init();
|
||||
|
||||
// calculate row ranges that fall into requested key ranges using short key index
|
||||
Status _get_row_ranges_by_keys();
|
||||
Status _prepare_seek(const StorageReadOptions::KeyRange& key_range);
|
||||
Status _lookup_ordinal(const RowCursor& key, bool is_include,
|
||||
rowid_t upper_bound, rowid_t* rowid);
|
||||
Status _seek_and_peek(rowid_t rowid);
|
||||
|
||||
// calculate row ranges that satisfy requested column conditions using various column index
|
||||
Status _get_row_ranges_by_column_conditions();
|
||||
// TODO move column index related logic to ColumnReader
|
||||
Status _get_row_ranges_from_zone_map(RowRanges* zone_map_row_ranges);
|
||||
|
||||
Status _init_column_iterators();
|
||||
|
||||
Status _next_batch(RowBlockV2* block, size_t* rows_read);
|
||||
|
||||
uint32_t segment_id() const { return _segment->id(); }
|
||||
@ -68,28 +77,26 @@ private:
|
||||
std::shared_ptr<Segment> _segment;
|
||||
// TODO(zc): rethink if we need copy it
|
||||
Schema _schema;
|
||||
// _column_iterators.size() == _schema.num_columns()
|
||||
// _column_iterators[cid] == nullptr if cid is not in _schema
|
||||
std::vector<ColumnIterator*> _column_iterators;
|
||||
// after init(), `_row_ranges` contains all rowid to scan
|
||||
RowRanges _row_ranges;
|
||||
// the next rowid to read
|
||||
rowid_t _cur_rowid;
|
||||
// index of the row range where `_cur_rowid` belongs to
|
||||
size_t _cur_range_id;
|
||||
// the actual init process is delayed to the first call to next_batch()
|
||||
bool _inited;
|
||||
|
||||
StorageReadOptions _opts;
|
||||
|
||||
// row ranges to scan
|
||||
size_t _cur_range_id;
|
||||
RowRanges _row_ranges;
|
||||
|
||||
// Only used when init is called, help to finish seek_and_peek.
|
||||
// Data will be saved in this batch
|
||||
// row schema of the key to seek
|
||||
// only used in `_get_row_ranges_by_keys`
|
||||
std::unique_ptr<Schema> _seek_schema;
|
||||
|
||||
// used to read data from columns when do bianry search to find
|
||||
// oridnal for input bounds
|
||||
// used to binary search the rowid for a given key
|
||||
// only used in `_get_row_ranges_by_keys`
|
||||
std::unique_ptr<RowBlockV2> _seek_block;
|
||||
// helper to save row to compare with input bounds
|
||||
std::unique_ptr<RowCursor> _key_cursor;
|
||||
|
||||
std::vector<ColumnIterator*> _column_iterators;
|
||||
|
||||
rowid_t _lower_rowid;
|
||||
rowid_t _upper_rowid;
|
||||
rowid_t _cur_rowid;
|
||||
|
||||
Arena _arena;
|
||||
};
|
||||
|
||||
@ -99,7 +99,7 @@ public:
|
||||
~Schema();
|
||||
|
||||
const std::vector<Field*>& columns() const { return _cols; }
|
||||
const Field* column(int idx) const { return _cols[idx]; }
|
||||
const Field* column(ColumnId cid) const { return _cols[cid]; }
|
||||
|
||||
size_t num_key_columns() const {
|
||||
return _num_key_columns;
|
||||
@ -133,8 +133,11 @@ public:
|
||||
size_t num_column_ids() const { return _col_ids.size(); }
|
||||
const std::vector<ColumnId>& column_ids() const { return _col_ids; }
|
||||
private:
|
||||
std::vector<Field*> _cols;
|
||||
// all valid ColumnIds in this schema
|
||||
std::vector<ColumnId> _col_ids;
|
||||
// _cols[cid] is ony valid when cid is contained in `_col_ids`
|
||||
std::vector<Field*> _cols;
|
||||
// _col_offsets[cid] is ony valid when cid is contained in `_col_ids`
|
||||
std::vector<size_t> _col_offsets;
|
||||
size_t _num_key_columns;
|
||||
size_t _schema_size;
|
||||
|
||||
@ -236,6 +236,8 @@ public:
|
||||
|
||||
uint32_t num_items() const { return _footer.num_items(); }
|
||||
|
||||
uint32_t num_rows_per_block() const { return _footer.num_rows_per_block(); }
|
||||
|
||||
Slice key(ssize_t ordinal) const {
|
||||
DCHECK(ordinal >= 0 && ordinal < num_items());
|
||||
return {_key_data.data + _offsets[ordinal], _offsets[ordinal + 1] - _offsets[ordinal]};
|
||||
|
||||
@ -107,9 +107,15 @@ public:
|
||||
static IntCounter meta_read_request_total;
|
||||
static IntCounter meta_read_request_duration_us;
|
||||
|
||||
// Counters for segment_v2
|
||||
// -----------------------
|
||||
// total number of segments read
|
||||
static IntCounter segment_read_total;
|
||||
// total number of rows in queried segments (before index pruning)
|
||||
static IntCounter segment_row_total;
|
||||
// total number of rows selected by short key index
|
||||
static IntCounter segment_rows_by_short_key;
|
||||
// total number of rows selected by zone map index
|
||||
static IntCounter segment_rows_read_by_zone_map;
|
||||
|
||||
static IntCounter txn_begin_request_total;
|
||||
|
||||
@ -89,7 +89,7 @@ TEST_F(SegmentReaderWriterTest, normal) {
|
||||
ASSERT_TRUE(st.ok());
|
||||
// reader
|
||||
{
|
||||
std::shared_ptr<Segment> segment(new Segment(fname, 0, tablet_schema, num_rows_per_block));
|
||||
std::shared_ptr<Segment> segment(new Segment(fname, 0, tablet_schema.get()));
|
||||
st = segment->open();
|
||||
LOG(INFO) << "segment open, msg=" << st.to_string();
|
||||
ASSERT_TRUE(st.ok());
|
||||
@ -97,13 +97,8 @@ TEST_F(SegmentReaderWriterTest, normal) {
|
||||
Schema schema(*tablet_schema);
|
||||
// scan all rows
|
||||
{
|
||||
std::unique_ptr<SegmentIterator> iter;
|
||||
st = segment->new_iterator(schema, &iter);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
StorageReadOptions read_opts;
|
||||
st = iter->init(read_opts);
|
||||
ASSERT_TRUE(st.ok());
|
||||
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
|
||||
|
||||
Arena arena;
|
||||
RowBlockV2 block(schema, 1024, &arena);
|
||||
@ -132,14 +127,8 @@ TEST_F(SegmentReaderWriterTest, normal) {
|
||||
}
|
||||
// test seek, key
|
||||
{
|
||||
std::unique_ptr<SegmentIterator> iter;
|
||||
st = segment->new_iterator(schema, &iter);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
// lower bound
|
||||
StorageReadOptions read_opts;
|
||||
read_opts.lower_bound.reset(new RowCursor());
|
||||
RowCursor* lower_bound = read_opts.lower_bound.get();
|
||||
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
|
||||
lower_bound->init(*tablet_schema, 2);
|
||||
{
|
||||
auto cell = lower_bound->cell(0);
|
||||
@ -151,22 +140,19 @@ TEST_F(SegmentReaderWriterTest, normal) {
|
||||
cell.set_not_null();
|
||||
*(int*)cell.mutable_cell_ptr() = 100;
|
||||
}
|
||||
read_opts.include_lower_bound = false;
|
||||
|
||||
// upper bound
|
||||
read_opts.upper_bound.reset(new RowCursor());
|
||||
RowCursor* upper_bound = read_opts.upper_bound.get();
|
||||
std::unique_ptr<RowCursor> upper_bound(new RowCursor());
|
||||
upper_bound->init(*tablet_schema, 1);
|
||||
{
|
||||
auto cell = upper_bound->cell(0);
|
||||
cell.set_not_null();
|
||||
*(int*)cell.mutable_cell_ptr() = 200;
|
||||
}
|
||||
read_opts.include_upper_bound = true;
|
||||
|
||||
st = iter->init(read_opts);
|
||||
LOG(INFO) << "iterator init msg=" << st.to_string();
|
||||
ASSERT_TRUE(st.ok());
|
||||
StorageReadOptions read_opts;
|
||||
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), true);
|
||||
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
|
||||
|
||||
Arena arena;
|
||||
RowBlockV2 block(schema, 100, &arena);
|
||||
@ -180,26 +166,18 @@ TEST_F(SegmentReaderWriterTest, normal) {
|
||||
}
|
||||
// test seek, key
|
||||
{
|
||||
std::unique_ptr<SegmentIterator> iter;
|
||||
st = segment->new_iterator(schema, &iter);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
StorageReadOptions read_opts;
|
||||
|
||||
// lower bound
|
||||
read_opts.lower_bound.reset(new RowCursor());
|
||||
RowCursor* lower_bound = read_opts.lower_bound.get();
|
||||
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
|
||||
lower_bound->init(*tablet_schema, 1);
|
||||
{
|
||||
auto cell = lower_bound->cell(0);
|
||||
cell.set_not_null();
|
||||
*(int*)cell.mutable_cell_ptr() = 40970;
|
||||
}
|
||||
read_opts.include_lower_bound = false;
|
||||
|
||||
st = iter->init(read_opts);
|
||||
LOG(INFO) << "iterator init msg=" << st.to_string();
|
||||
ASSERT_TRUE(st.ok());
|
||||
StorageReadOptions read_opts;
|
||||
read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
|
||||
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
|
||||
|
||||
Arena arena;
|
||||
RowBlockV2 block(schema, 100, &arena);
|
||||
@ -209,36 +187,26 @@ TEST_F(SegmentReaderWriterTest, normal) {
|
||||
}
|
||||
// test seek, key (-2, -1)
|
||||
{
|
||||
std::unique_ptr<SegmentIterator> iter;
|
||||
st = segment->new_iterator(schema, &iter);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
StorageReadOptions read_opts;
|
||||
|
||||
// lower bound
|
||||
read_opts.lower_bound.reset(new RowCursor());
|
||||
RowCursor* lower_bound = read_opts.lower_bound.get();
|
||||
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
|
||||
lower_bound->init(*tablet_schema, 1);
|
||||
{
|
||||
auto cell = lower_bound->cell(0);
|
||||
cell.set_not_null();
|
||||
*(int*)cell.mutable_cell_ptr() = -2;
|
||||
}
|
||||
read_opts.include_lower_bound = false;
|
||||
|
||||
read_opts.upper_bound.reset(new RowCursor());
|
||||
RowCursor* upper_bound = read_opts.upper_bound.get();
|
||||
std::unique_ptr<RowCursor> upper_bound(new RowCursor());
|
||||
upper_bound->init(*tablet_schema, 1);
|
||||
{
|
||||
auto cell = upper_bound->cell(0);
|
||||
cell.set_not_null();
|
||||
*(int*)cell.mutable_cell_ptr() = -1;
|
||||
}
|
||||
read_opts.include_upper_bound = false;
|
||||
|
||||
st = iter->init(read_opts);
|
||||
LOG(INFO) << "iterator init msg=" << st.to_string();
|
||||
ASSERT_TRUE(st.ok());
|
||||
StorageReadOptions read_opts;
|
||||
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false);
|
||||
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
|
||||
|
||||
Arena arena;
|
||||
RowBlockV2 block(schema, 100, &arena);
|
||||
@ -299,18 +267,13 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
|
||||
|
||||
// reader with condition
|
||||
{
|
||||
std::shared_ptr<Segment> segment(new Segment(fname, 0, tablet_schema, num_rows_per_block));
|
||||
std::shared_ptr<Segment> segment(new Segment(fname, 0, tablet_schema.get()));
|
||||
st = segment->open();
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(64 * 1024, segment->num_rows());
|
||||
Schema schema(*tablet_schema);
|
||||
// scan all rows
|
||||
{
|
||||
std::unique_ptr<SegmentIterator> iter;
|
||||
st = segment->new_iterator(schema, &iter);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
StorageReadOptions read_opts;
|
||||
TCondition condition;
|
||||
condition.__set_column_name("2");
|
||||
condition.__set_condition_op("<");
|
||||
@ -319,9 +282,11 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
|
||||
std::shared_ptr<Conditions> conditions(new Conditions());
|
||||
conditions->set_tablet_schema(tablet_schema.get());
|
||||
conditions->append_condition(condition);
|
||||
read_opts.conditions = conditions;
|
||||
st = iter->init(read_opts);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
StorageReadOptions read_opts;
|
||||
read_opts.conditions = conditions.get();
|
||||
|
||||
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
|
||||
|
||||
Arena arena;
|
||||
RowBlockV2 block(schema, 1024, &arena);
|
||||
|
||||
Reference in New Issue
Block a user