From ae22d5e682121fb9fd5fb08df5f9ecaad9d724d8 Mon Sep 17 00:00:00 2001 From: Dayue Gao Date: Tue, 27 Aug 2019 17:57:42 +0800 Subject: [PATCH] Support multiple key ranges in RowwiseIterator and StorageReadOptions (#1704) support multiple key ranges in RowwiseIterator and StorageReadOptions remove unused fields and member functions in RowBlock and ColumnData read num_rows_per_block from short key index footer --- be/src/olap/iterators.h | 51 ++++++--- be/src/olap/row_block.cpp | 1 - be/src/olap/row_block.h | 3 - be/src/olap/rowset/column_data.cpp | 21 +--- be/src/olap/rowset/column_data.h | 1 - be/src/olap/rowset/segment_v2/segment.cpp | 13 +-- be/src/olap/rowset/segment_v2/segment.h | 14 +-- .../rowset/segment_v2/segment_iterator.cpp | 105 +++++++++--------- .../olap/rowset/segment_v2/segment_iterator.h | 55 +++++---- be/src/olap/schema.h | 7 +- be/src/olap/short_key_index.h | 2 + be/src/util/doris_metrics.h | 6 + .../olap/rowset/segment_v2/segment_test.cpp | 79 ++++--------- 13 files changed, 167 insertions(+), 191 deletions(-) diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index 25ceb2d140..cf6b07d9a1 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -28,27 +28,44 @@ class RowBlockV2; class Schema; class Conditions; -struct StorageReadOptions { - // lower_bound defines the smallest key at which iterator will - // return data. - // If lower_bound is null, won't return - std::shared_ptr lower_bound; +class StorageReadOptions { +public: + struct KeyRange { + KeyRange() + : lower_key(nullptr), + include_lower(false), + upper_key(nullptr), + include_upper(false) { + } - // If include_lower_bound is true, data equal with lower_bound will - // be read - bool include_lower_bound = false; + KeyRange(const RowCursor* lower_key_, + bool include_lower_, + const RowCursor* upper_key_, + bool include_upper_) + : lower_key(lower_key_), + include_lower(include_lower_), + upper_key(upper_key_), + include_upper(include_upper_) { + } - // upper_bound defines the extend upto which the iterator can return - // data. - std::shared_ptr upper_bound; + // the lower bound of the range, nullptr if not existed + const RowCursor* lower_key; + // whether `lower_key` is included in the range + bool include_lower; + // the upper bound of the range, nullptr if not existed + const RowCursor* upper_key; + // whether `upper_key` is included in the range + bool include_upper; + }; - // If include_upper_bound is true, data equal with upper_bound will - // be read - bool include_upper_bound = false; + // reader's key ranges, empty if not existed. + // used by short key index to filter row blocks + std::vector key_ranges; - // reader's column predicates - // used by zone map/bloom filter/secondary index to prune data - std::shared_ptr conditions; + // reader's column predicates, nullptr if not existed. + // used by column index to filter pages and rows + // TODO use vector instead + const Conditions* conditions = nullptr; }; // Used to read data in RowBlockV2 one by one diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp index 4d34fdf754..ab5e6bfb72 100644 --- a/be/src/olap/row_block.cpp +++ b/be/src/olap/row_block.cpp @@ -49,7 +49,6 @@ RowBlock::~RowBlock() { } OLAPStatus RowBlock::init(const RowBlockInfo& block_info) { - _field_count = _schema->num_columns(); _info = block_info; _null_supported = block_info.null_supported; _capacity = _info.row_num; diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h index d6b98e83ce..6a05480a63 100644 --- a/be/src/olap/row_block.h +++ b/be/src/olap/row_block.h @@ -162,9 +162,6 @@ private: bool _null_supported; - size_t _field_count = 0; - bool _need_checksum = true; - // Data in memory is construct from row cursors, these row cursors's size is equal char* _mem_buf = nullptr; // equal with _mem_row_bytes * _info.row_num diff --git a/be/src/olap/rowset/column_data.cpp b/be/src/olap/rowset/column_data.cpp index 6a6176fe01..0a89285776 100644 --- a/be/src/olap/rowset/column_data.cpp +++ b/be/src/olap/rowset/column_data.cpp @@ -482,21 +482,6 @@ OLAPStatus ColumnData::get_first_row_block(RowBlock** row_block) { return OLAP_SUCCESS; } -OLAPStatus ColumnData::get_next_row_block(RowBlock** row_block) { - _is_normal_read = true; - OLAPStatus res = _get_block(false); - if (res != OLAP_SUCCESS) { - if (res != OLAP_ERR_DATA_EOF) { - OLAP_LOG_WARNING("fail to load data to row block. [res=%d]", res); - } - *row_block = nullptr; - return res; - } - - *row_block = _read_block.get(); - return OLAP_SUCCESS; -} - bool ColumnData::rowset_pruning_filter() { if (empty() || zero_num_rows()) { return true; @@ -516,7 +501,7 @@ int ColumnData::delete_pruning_filter() { return DEL_NOT_SATISFIED; } - if (false == _segment_group->has_zone_maps()) { + if (!_segment_group->has_zone_maps()) { /* * if segment_group has no column statistics, we cannot judge whether the data can be filtered or not */ @@ -549,9 +534,9 @@ int ColumnData::delete_pruning_filter() { } } - if (true == del_stastified) { + if (del_stastified) { ret = DEL_SATISFIED; - } else if (true == del_partial_stastified) { + } else if (del_partial_stastified) { ret = DEL_PARTIAL_SATISFIED; } else { ret = DEL_NOT_SATISFIED; diff --git a/be/src/olap/rowset/column_data.h b/be/src/olap/rowset/column_data.h index fbd20c435c..587ae16c26 100644 --- a/be/src/olap/rowset/column_data.h +++ b/be/src/olap/rowset/column_data.h @@ -76,7 +76,6 @@ public: RuntimeState* runtime_state); OLAPStatus get_first_row_block(RowBlock** row_block); - OLAPStatus get_next_row_block(RowBlock** row_block); // Only used to binary search in full-key find row const RowCursor* seek_and_get_current_row(const RowBlockPosition& position); diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index a42e2c5513..c2f8579c1d 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -34,12 +34,10 @@ using strings::Substitute; Segment::Segment( std::string fname, uint32_t segment_id, - const std::shared_ptr& tablet_schema, - size_t num_rows_per_block) + const TabletSchema* tablet_schema) : _fname(std::move(fname)), _segment_id(segment_id), - _tablet_schema(tablet_schema), - _num_rows_per_block(num_rows_per_block) { + _tablet_schema(tablet_schema) { } Segment::~Segment() { @@ -71,9 +69,10 @@ Status Segment::open() { return Status::OK(); } -Status Segment::new_iterator(const Schema& schema, std::unique_ptr* output) { - output->reset(new SegmentIterator(this->shared_from_this(), schema)); - return Status::OK(); +std::unique_ptr Segment::new_iterator(const Schema& schema, const StorageReadOptions& read_options) { + auto it = std::unique_ptr(new SegmentIterator(this->shared_from_this(), schema)); + it->init(read_options); + return it; } // Read data at offset of input file, check if the file content match the magic diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index e69a10e751..45b17c6c5c 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -33,16 +33,18 @@ namespace doris { class RandomAccessFile; class SegmentGroup; -class FieldInfo; class TabletSchema; class ShortKeyIndexDecoder; class Schema; +class StorageReadOptions; namespace segment_v2 { class ColumnReader; class ColumnIterator; +class Segment; class SegmentIterator; +using SegmentSharedPtr = std::shared_ptr; // A Segment is used to represent a segment in memory format. When segment is // generated, it won't be modified, so this struct aimed to help read operation. @@ -55,13 +57,12 @@ class SegmentIterator; class Segment : public std::enable_shared_from_this { public: Segment(std::string fname, uint32_t segment_id, - const std::shared_ptr& tablet_schema, - size_t num_rows_per_block); + const TabletSchema* tablet_schema); ~Segment(); Status open(); - Status new_iterator(const Schema& schema, std::unique_ptr* iter); + std::unique_ptr new_iterator(const Schema& schema, const StorageReadOptions& read_options); uint64_t id() const { return _segment_id; } @@ -71,7 +72,7 @@ private: friend class SegmentIterator; Status new_column_iterator(uint32_t cid, ColumnIterator** iter); - uint32_t num_rows_per_block() const { return _num_rows_per_block; } + uint32_t num_rows_per_block() const { return _sk_index_decoder->num_rows_per_block(); } size_t num_short_keys() const { return _tablet_schema->num_short_key_columns(); } Status _check_magic(uint64_t offset); @@ -97,8 +98,7 @@ private: private: std::string _fname; uint32_t _segment_id; - std::shared_ptr _tablet_schema; - uint32_t _num_rows_per_block; + const TabletSchema* _tablet_schema; SegmentFooterPB _footer; std::unique_ptr _input_file; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 4f866003d8..f6ddca6218 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -34,11 +34,13 @@ namespace segment_v2 { SegmentIterator::SegmentIterator(std::shared_ptr segment, const Schema& schema) - : _segment(std::move(segment)), - _schema(schema), - _cur_range_id(0), - _column_iterators(_schema.num_columns(), nullptr), - _cur_rowid(0) { + : _segment(std::move(segment)), + _schema(schema), + _column_iterators(_schema.num_columns(), nullptr), + _row_ranges(RowRanges::create_single(_segment->num_rows())), + _cur_rowid(0), + _cur_range_id(0), + _inited(false) { } SegmentIterator::~SegmentIterator() { @@ -47,72 +49,66 @@ SegmentIterator::~SegmentIterator() { } } -Status SegmentIterator::init(const StorageReadOptions& opts) { +Status SegmentIterator::_init() { DorisMetrics::segment_read_total.increment(1); - _opts = opts; - RETURN_IF_ERROR(_init_short_key_range()); - RETURN_IF_ERROR(_init_row_ranges()); + RETURN_IF_ERROR(_get_row_ranges_by_keys()); + RETURN_IF_ERROR(_get_row_ranges_by_column_conditions()); if (!_row_ranges.is_empty()) { _cur_range_id = 0; _cur_rowid = _row_ranges.get_range_from(_cur_range_id); } RETURN_IF_ERROR(_init_column_iterators()); - return Status::OK(); } -// This function will use input key bounds to get a row range. -Status SegmentIterator::_init_short_key_range() { +Status SegmentIterator::_get_row_ranges_by_keys() { DorisMetrics::segment_row_total.increment(num_rows()); - _lower_rowid = 0; - _upper_rowid = num_rows(); - // initial short key row ranges: [0, num_rows()) - _row_ranges = RowRanges::create_single(_lower_rowid, _upper_rowid); - // fast path for empty segment - if (_upper_rowid == 0) { + // fast path for empty segment or empty key ranges + if (_row_ranges.is_empty() || _opts.key_ranges.empty()) { return Status::OK(); } - if (_opts.lower_bound == nullptr && _opts.upper_bound == nullptr) { - return Status::OK(); + RowRanges result_ranges; + for (auto& key_range : _opts.key_ranges) { + rowid_t lower_rowid = 0; + rowid_t upper_rowid = num_rows(); + RETURN_IF_ERROR(_prepare_seek(key_range)); + if (key_range.upper_key != nullptr) { + // If client want to read upper_bound, the include_upper is true. So we + // should get the first ordinal at which key is larger than upper_bound. + // So we call _lookup_ordinal with include_upper's negate + RETURN_IF_ERROR(_lookup_ordinal( + *key_range.upper_key, !key_range.include_upper, num_rows(), &upper_rowid)); + } + if (upper_rowid > 0 && key_range.lower_key != nullptr) { + RETURN_IF_ERROR( + _lookup_ordinal(*key_range.lower_key, key_range.include_lower, upper_rowid, &lower_rowid)); + } + auto row_range = RowRanges::create_single(lower_rowid, upper_rowid); + RowRanges::ranges_union(result_ranges, row_range, &result_ranges); } - - RETURN_IF_ERROR(_prepare_seek()); - - // init row range with short key range - if (_opts.upper_bound != nullptr) { - // If client want to read upper_bound, the include_upper_bound is true. So we - // should get the first ordinal at which key is larger than upper_bound. - // So we call _lookup_ordinal with include_upper_bound's negate - RETURN_IF_ERROR(_lookup_ordinal( - *_opts.upper_bound, !_opts.include_upper_bound, num_rows(), &_upper_rowid)); - } - if (_upper_rowid > 0 && _opts.lower_bound != nullptr) { - RETURN_IF_ERROR(_lookup_ordinal( - *_opts.lower_bound, _opts.include_lower_bound, _upper_rowid, &_lower_rowid)); - } - // seeked short key row ranges: [_lower_rowid, _upper_rowid) - _row_ranges = RowRanges::create_single(_lower_rowid, _upper_rowid); - DorisMetrics::segment_rows_by_short_key.increment(_upper_rowid - _lower_rowid); + // pre-condition: _row_ranges == [0, num_rows) + _row_ranges = std::move(result_ranges); + DorisMetrics::segment_rows_by_short_key.increment(_row_ranges.count()); return Status::OK(); } // Set up environment for the following seek. -Status SegmentIterator::_prepare_seek() { +Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_range) { std::vector key_fields; std::set column_set; - if (_opts.lower_bound != nullptr) { - for (auto cid : _opts.lower_bound->schema()->column_ids()) { + if (key_range.lower_key != nullptr) { + for (auto cid : key_range.lower_key->schema()->column_ids()) { column_set.emplace(cid); - key_fields.emplace_back(_opts.lower_bound->schema()->column(cid)); + key_fields.emplace_back(key_range.lower_key->schema()->column(cid)); } } - if (_opts.upper_bound != nullptr) { - for (auto cid : _opts.upper_bound->schema()->column_ids()) { + if (key_range.upper_key != nullptr) { + for (auto cid : key_range.upper_key->schema()->column_ids()) { if (column_set.count(cid) == 0) { - key_fields.emplace_back(_opts.upper_bound->schema()->column(cid)); + key_fields.emplace_back(key_range.upper_key->schema()->column(cid)); column_set.emplace(cid); } } @@ -123,15 +119,15 @@ Status SegmentIterator::_prepare_seek() { // create used column iterator for (auto cid : _seek_schema->column_ids()) { if (_column_iterators[cid] == nullptr) { - RETURN_IF_ERROR(_create_column_iterator(cid, &_column_iterators[cid])); + RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid])); } } return Status::OK(); } -Status SegmentIterator::_init_row_ranges() { - if (_lower_rowid == _upper_rowid) { +Status SegmentIterator::_get_row_ranges_by_column_conditions() { + if (_row_ranges.is_empty()) { // no data just return; return Status::OK(); } @@ -174,7 +170,7 @@ Status SegmentIterator::_init_column_iterators() { } for (auto cid : _schema.column_ids()) { if (_column_iterators[cid] == nullptr) { - RETURN_IF_ERROR(_create_column_iterator(cid, &_column_iterators[cid])); + RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid])); } _column_iterators[cid]->seek_to_ordinal(_cur_rowid); @@ -182,10 +178,6 @@ Status SegmentIterator::_init_column_iterators() { return Status::OK(); } -Status SegmentIterator::_create_column_iterator(uint32_t cid, ColumnIterator** iter) { - return _segment->new_column_iterator(cid, iter); -} - // Schema of lhs and rhs are different. // callers should assure that rhs' schema has all columns in lhs schema template @@ -297,6 +289,11 @@ Status SegmentIterator::_next_batch(RowBlockV2* block, size_t* rows_read) { } Status SegmentIterator::next_batch(RowBlockV2* block) { + if (UNLIKELY(!_inited)) { + RETURN_IF_ERROR(_init()); + _inited = true; + } + if (_row_ranges.is_empty() || _cur_rowid >= _row_ranges.to()) { block->resize(0); return Status::EndOfFile("no more data in segment"); @@ -304,7 +301,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) { size_t rows_to_read = block->capacity(); while (rows_to_read > 0) { if (_cur_rowid >= _row_ranges.get_range_to(_cur_range_id)) { - // current row range is read over, + // current row range is read over, trying to read from next range if (_cur_range_id >= _row_ranges.range_size() - 1) { // there is no more row range break; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 0b2494c055..a5a93b65c9 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -45,20 +45,29 @@ class SegmentIterator : public RowwiseIterator { public: SegmentIterator(std::shared_ptr segment, const Schema& _schema); ~SegmentIterator() override; - Status init(const StorageReadOptions& opts) override; + Status init(const StorageReadOptions& opts) override { + _opts = opts; + return Status::OK(); + } Status next_batch(RowBlockV2* row_block) override; const Schema& schema() const override { return _schema; } private: - Status _init_short_key_range(); - Status _prepare_seek(); - Status _init_row_ranges(); - Status _get_row_ranges_from_zone_map(RowRanges* zone_map_row_ranges); - Status _init_column_iterators(); - Status _create_column_iterator(uint32_t cid, ColumnIterator** iter); + Status _init(); + // calculate row ranges that fall into requested key ranges using short key index + Status _get_row_ranges_by_keys(); + Status _prepare_seek(const StorageReadOptions::KeyRange& key_range); Status _lookup_ordinal(const RowCursor& key, bool is_include, rowid_t upper_bound, rowid_t* rowid); Status _seek_and_peek(rowid_t rowid); + + // calculate row ranges that satisfy requested column conditions using various column index + Status _get_row_ranges_by_column_conditions(); + // TODO move column index related logic to ColumnReader + Status _get_row_ranges_from_zone_map(RowRanges* zone_map_row_ranges); + + Status _init_column_iterators(); + Status _next_batch(RowBlockV2* block, size_t* rows_read); uint32_t segment_id() const { return _segment->id(); } @@ -68,28 +77,26 @@ private: std::shared_ptr _segment; // TODO(zc): rethink if we need copy it Schema _schema; + // _column_iterators.size() == _schema.num_columns() + // _column_iterators[cid] == nullptr if cid is not in _schema + std::vector _column_iterators; + // after init(), `_row_ranges` contains all rowid to scan + RowRanges _row_ranges; + // the next rowid to read + rowid_t _cur_rowid; + // index of the row range where `_cur_rowid` belongs to + size_t _cur_range_id; + // the actual init process is delayed to the first call to next_batch() + bool _inited; StorageReadOptions _opts; - // row ranges to scan - size_t _cur_range_id; - RowRanges _row_ranges; - - // Only used when init is called, help to finish seek_and_peek. - // Data will be saved in this batch + // row schema of the key to seek + // only used in `_get_row_ranges_by_keys` std::unique_ptr _seek_schema; - - // used to read data from columns when do bianry search to find - // oridnal for input bounds + // used to binary search the rowid for a given key + // only used in `_get_row_ranges_by_keys` std::unique_ptr _seek_block; - // helper to save row to compare with input bounds - std::unique_ptr _key_cursor; - - std::vector _column_iterators; - - rowid_t _lower_rowid; - rowid_t _upper_rowid; - rowid_t _cur_rowid; Arena _arena; }; diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index fe72c6d745..d66bb47dd7 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -99,7 +99,7 @@ public: ~Schema(); const std::vector& columns() const { return _cols; } - const Field* column(int idx) const { return _cols[idx]; } + const Field* column(ColumnId cid) const { return _cols[cid]; } size_t num_key_columns() const { return _num_key_columns; @@ -133,8 +133,11 @@ public: size_t num_column_ids() const { return _col_ids.size(); } const std::vector& column_ids() const { return _col_ids; } private: - std::vector _cols; + // all valid ColumnIds in this schema std::vector _col_ids; + // _cols[cid] is ony valid when cid is contained in `_col_ids` + std::vector _cols; + // _col_offsets[cid] is ony valid when cid is contained in `_col_ids` std::vector _col_offsets; size_t _num_key_columns; size_t _schema_size; diff --git a/be/src/olap/short_key_index.h b/be/src/olap/short_key_index.h index 5bc0374898..2f77d8844e 100644 --- a/be/src/olap/short_key_index.h +++ b/be/src/olap/short_key_index.h @@ -236,6 +236,8 @@ public: uint32_t num_items() const { return _footer.num_items(); } + uint32_t num_rows_per_block() const { return _footer.num_rows_per_block(); } + Slice key(ssize_t ordinal) const { DCHECK(ordinal >= 0 && ordinal < num_items()); return {_key_data.data + _offsets[ordinal], _offsets[ordinal + 1] - _offsets[ordinal]}; diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h index 7e0f3bdb4b..222f42380e 100644 --- a/be/src/util/doris_metrics.h +++ b/be/src/util/doris_metrics.h @@ -107,9 +107,15 @@ public: static IntCounter meta_read_request_total; static IntCounter meta_read_request_duration_us; + // Counters for segment_v2 + // ----------------------- + // total number of segments read static IntCounter segment_read_total; + // total number of rows in queried segments (before index pruning) static IntCounter segment_row_total; + // total number of rows selected by short key index static IntCounter segment_rows_by_short_key; + // total number of rows selected by zone map index static IntCounter segment_rows_read_by_zone_map; static IntCounter txn_begin_request_total; diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp index 6f955efd79..4449699ed4 100644 --- a/be/test/olap/rowset/segment_v2/segment_test.cpp +++ b/be/test/olap/rowset/segment_v2/segment_test.cpp @@ -89,7 +89,7 @@ TEST_F(SegmentReaderWriterTest, normal) { ASSERT_TRUE(st.ok()); // reader { - std::shared_ptr segment(new Segment(fname, 0, tablet_schema, num_rows_per_block)); + std::shared_ptr segment(new Segment(fname, 0, tablet_schema.get())); st = segment->open(); LOG(INFO) << "segment open, msg=" << st.to_string(); ASSERT_TRUE(st.ok()); @@ -97,13 +97,8 @@ TEST_F(SegmentReaderWriterTest, normal) { Schema schema(*tablet_schema); // scan all rows { - std::unique_ptr iter; - st = segment->new_iterator(schema, &iter); - ASSERT_TRUE(st.ok()); - StorageReadOptions read_opts; - st = iter->init(read_opts); - ASSERT_TRUE(st.ok()); + std::unique_ptr iter = segment->new_iterator(schema, read_opts); Arena arena; RowBlockV2 block(schema, 1024, &arena); @@ -132,14 +127,8 @@ TEST_F(SegmentReaderWriterTest, normal) { } // test seek, key { - std::unique_ptr iter; - st = segment->new_iterator(schema, &iter); - ASSERT_TRUE(st.ok()); - // lower bound - StorageReadOptions read_opts; - read_opts.lower_bound.reset(new RowCursor()); - RowCursor* lower_bound = read_opts.lower_bound.get(); + std::unique_ptr lower_bound(new RowCursor()); lower_bound->init(*tablet_schema, 2); { auto cell = lower_bound->cell(0); @@ -151,22 +140,19 @@ TEST_F(SegmentReaderWriterTest, normal) { cell.set_not_null(); *(int*)cell.mutable_cell_ptr() = 100; } - read_opts.include_lower_bound = false; // upper bound - read_opts.upper_bound.reset(new RowCursor()); - RowCursor* upper_bound = read_opts.upper_bound.get(); + std::unique_ptr upper_bound(new RowCursor()); upper_bound->init(*tablet_schema, 1); { auto cell = upper_bound->cell(0); cell.set_not_null(); *(int*)cell.mutable_cell_ptr() = 200; } - read_opts.include_upper_bound = true; - st = iter->init(read_opts); - LOG(INFO) << "iterator init msg=" << st.to_string(); - ASSERT_TRUE(st.ok()); + StorageReadOptions read_opts; + read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), true); + std::unique_ptr iter = segment->new_iterator(schema, read_opts); Arena arena; RowBlockV2 block(schema, 100, &arena); @@ -180,26 +166,18 @@ TEST_F(SegmentReaderWriterTest, normal) { } // test seek, key { - std::unique_ptr iter; - st = segment->new_iterator(schema, &iter); - ASSERT_TRUE(st.ok()); - - StorageReadOptions read_opts; - // lower bound - read_opts.lower_bound.reset(new RowCursor()); - RowCursor* lower_bound = read_opts.lower_bound.get(); + std::unique_ptr lower_bound(new RowCursor()); lower_bound->init(*tablet_schema, 1); { auto cell = lower_bound->cell(0); cell.set_not_null(); *(int*)cell.mutable_cell_ptr() = 40970; } - read_opts.include_lower_bound = false; - st = iter->init(read_opts); - LOG(INFO) << "iterator init msg=" << st.to_string(); - ASSERT_TRUE(st.ok()); + StorageReadOptions read_opts; + read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false); + std::unique_ptr iter = segment->new_iterator(schema, read_opts); Arena arena; RowBlockV2 block(schema, 100, &arena); @@ -209,36 +187,26 @@ TEST_F(SegmentReaderWriterTest, normal) { } // test seek, key (-2, -1) { - std::unique_ptr iter; - st = segment->new_iterator(schema, &iter); - ASSERT_TRUE(st.ok()); - - StorageReadOptions read_opts; - // lower bound - read_opts.lower_bound.reset(new RowCursor()); - RowCursor* lower_bound = read_opts.lower_bound.get(); + std::unique_ptr lower_bound(new RowCursor()); lower_bound->init(*tablet_schema, 1); { auto cell = lower_bound->cell(0); cell.set_not_null(); *(int*)cell.mutable_cell_ptr() = -2; } - read_opts.include_lower_bound = false; - read_opts.upper_bound.reset(new RowCursor()); - RowCursor* upper_bound = read_opts.upper_bound.get(); + std::unique_ptr upper_bound(new RowCursor()); upper_bound->init(*tablet_schema, 1); { auto cell = upper_bound->cell(0); cell.set_not_null(); *(int*)cell.mutable_cell_ptr() = -1; } - read_opts.include_upper_bound = false; - st = iter->init(read_opts); - LOG(INFO) << "iterator init msg=" << st.to_string(); - ASSERT_TRUE(st.ok()); + StorageReadOptions read_opts; + read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false); + std::unique_ptr iter = segment->new_iterator(schema, read_opts); Arena arena; RowBlockV2 block(schema, 100, &arena); @@ -299,18 +267,13 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) { // reader with condition { - std::shared_ptr segment(new Segment(fname, 0, tablet_schema, num_rows_per_block)); + std::shared_ptr segment(new Segment(fname, 0, tablet_schema.get())); st = segment->open(); ASSERT_TRUE(st.ok()); ASSERT_EQ(64 * 1024, segment->num_rows()); Schema schema(*tablet_schema); // scan all rows { - std::unique_ptr iter; - st = segment->new_iterator(schema, &iter); - ASSERT_TRUE(st.ok()); - - StorageReadOptions read_opts; TCondition condition; condition.__set_column_name("2"); condition.__set_condition_op("<"); @@ -319,9 +282,11 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) { std::shared_ptr conditions(new Conditions()); conditions->set_tablet_schema(tablet_schema.get()); conditions->append_condition(condition); - read_opts.conditions = conditions; - st = iter->init(read_opts); - ASSERT_TRUE(st.ok()); + + StorageReadOptions read_opts; + read_opts.conditions = conditions.get(); + + std::unique_ptr iter = segment->new_iterator(schema, read_opts); Arena arena; RowBlockV2 block(schema, 1024, &arena);