Support multiple key ranges in RowwiseIterator and StorageReadOptions (#1704)

support multiple key ranges in RowwiseIterator and StorageReadOptions remove unused fields and member functions in RowBlock and ColumnData read num_rows_per_block from short key index footer
2019-08-27 17:57:42 +08:00
parent c403343c0a
commit ae22d5e682
13 changed files with 167 additions and 191 deletions
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@ -28,27 +28,44 @@ class RowBlockV2;
 class Schema;
 class Conditions;

-struct StorageReadOptions {
-    // lower_bound defines the smallest key at which iterator will
-    // return data.
-    // If lower_bound is null, won't return
-    std::shared_ptr<RowCursor> lower_bound;
+class StorageReadOptions {
+public:
+    struct KeyRange {
+        KeyRange()
+            : lower_key(nullptr),
+              include_lower(false),
+              upper_key(nullptr),
+              include_upper(false) {
+        }

-    // If include_lower_bound is true, data equal with lower_bound will
-    // be read
-    bool include_lower_bound = false;
+        KeyRange(const RowCursor* lower_key_,
+                 bool include_lower_,
+                 const RowCursor* upper_key_,
+                 bool include_upper_)
+            : lower_key(lower_key_),
+              include_lower(include_lower_),
+              upper_key(upper_key_),
+              include_upper(include_upper_) {
+        }

-    // upper_bound defines the extend upto which the iterator can return
-    // data.
-    std::shared_ptr<RowCursor> upper_bound;
+        // the lower bound of the range, nullptr if not existed
+        const RowCursor* lower_key;
+        // whether `lower_key` is included in the range
+        bool include_lower;
+        // the upper bound of the range, nullptr if not existed
+        const RowCursor* upper_key;
+        // whether `upper_key` is included in the range
+        bool include_upper;
+    };

-    // If include_upper_bound is true, data equal with upper_bound will
-    // be read
-    bool include_upper_bound = false;
+    // reader's key ranges, empty if not existed.
+    // used by short key index to filter row blocks
+    std::vector<KeyRange> key_ranges;

-    // reader's column predicates
-    // used by zone map/bloom filter/secondary index to prune data
-    std::shared_ptr<Conditions> conditions;
+    // reader's column predicates, nullptr if not existed.
+    // used by column index to filter pages and rows
+    // TODO use vector<ColumnPredicate*> instead
+    const Conditions* conditions = nullptr;
 };

 // Used to read data in RowBlockV2 one by one
--- a/be/src/olap/row_block.cpp
+++ b/be/src/olap/row_block.cpp
@ -49,7 +49,6 @@ RowBlock::~RowBlock() {
 }

 OLAPStatus RowBlock::init(const RowBlockInfo& block_info) {
-    _field_count = _schema->num_columns();
    _info = block_info;
    _null_supported = block_info.null_supported;
    _capacity = _info.row_num;
--- a/be/src/olap/row_block.h
+++ b/be/src/olap/row_block.h
@ -162,9 +162,6 @@ private:
    
    bool _null_supported;

-    size_t _field_count = 0;
-    bool _need_checksum = true;
-
    // Data in memory is construct from row cursors, these row cursors's size is equal
    char* _mem_buf = nullptr;
    // equal with _mem_row_bytes * _info.row_num
--- a/be/src/olap/rowset/column_data.cpp
+++ b/be/src/olap/rowset/column_data.cpp
@ -482,21 +482,6 @@ OLAPStatus ColumnData::get_first_row_block(RowBlock** row_block) {
    return OLAP_SUCCESS;
 }

-OLAPStatus ColumnData::get_next_row_block(RowBlock** row_block) {
-    _is_normal_read = true;
-    OLAPStatus res = _get_block(false);
-    if (res != OLAP_SUCCESS) {
-        if (res != OLAP_ERR_DATA_EOF) {
-            OLAP_LOG_WARNING("fail to load data to row block. [res=%d]", res);
-        }
-        *row_block = nullptr;
-        return res;
-    }
-
-    *row_block = _read_block.get();
-    return OLAP_SUCCESS;
-}
-
 bool ColumnData::rowset_pruning_filter() {
    if (empty() || zero_num_rows()) {
        return true;
@ -516,7 +501,7 @@ int ColumnData::delete_pruning_filter() {
        return DEL_NOT_SATISFIED;
    }

-    if (false == _segment_group->has_zone_maps()) {
+    if (!_segment_group->has_zone_maps()) {
        /*
         * if segment_group has no column statistics, we cannot judge whether the data can be filtered or not
         */
@ -549,9 +534,9 @@ int ColumnData::delete_pruning_filter() {
        }
    }

-    if (true == del_stastified) {
+    if (del_stastified) {
        ret = DEL_SATISFIED;
-    } else if (true == del_partial_stastified) {
+    } else if (del_partial_stastified) {
        ret = DEL_PARTIAL_SATISFIED;
    } else {
        ret = DEL_NOT_SATISFIED;
--- a/be/src/olap/rowset/column_data.h
+++ b/be/src/olap/rowset/column_data.h
@ -76,7 +76,6 @@ public:
            RuntimeState* runtime_state);

    OLAPStatus get_first_row_block(RowBlock** row_block);
-    OLAPStatus get_next_row_block(RowBlock** row_block);

    // Only used to binary search in full-key find row
    const RowCursor* seek_and_get_current_row(const RowBlockPosition& position);
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@ -34,12 +34,10 @@ using strings::Substitute;

 Segment::Segment(
        std::string fname, uint32_t segment_id,
-        const std::shared_ptr<TabletSchema>& tablet_schema,
-        size_t num_rows_per_block)
+        const TabletSchema* tablet_schema)
        : _fname(std::move(fname)),
        _segment_id(segment_id),
-        _tablet_schema(tablet_schema),
-        _num_rows_per_block(num_rows_per_block) {
+        _tablet_schema(tablet_schema) {
 }

 Segment::~Segment() {
@ -71,9 +69,10 @@ Status Segment::open() {
    return Status::OK();
 }

-Status Segment::new_iterator(const Schema& schema, std::unique_ptr<SegmentIterator>* output) {
-    output->reset(new SegmentIterator(this->shared_from_this(), schema));
-    return Status::OK();
+std::unique_ptr<SegmentIterator> Segment::new_iterator(const Schema& schema, const StorageReadOptions& read_options) {
+    auto it = std::unique_ptr<SegmentIterator>(new SegmentIterator(this->shared_from_this(), schema));
+    it->init(read_options);
+    return it;
 }

 // Read data at offset of input file, check if the file content match the magic
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@ -33,16 +33,18 @@ namespace doris {

 class RandomAccessFile;
 class SegmentGroup;
-class FieldInfo;
 class TabletSchema;
 class ShortKeyIndexDecoder;
 class Schema;
+class StorageReadOptions;

 namespace segment_v2 {

 class ColumnReader;
 class ColumnIterator;
+class Segment;
 class SegmentIterator;
+using SegmentSharedPtr = std::shared_ptr<Segment>;

 // A Segment is used to represent a segment in memory format. When segment is
 // generated, it won't be modified, so this struct aimed to help read operation.
@ -55,13 +57,12 @@ class SegmentIterator;
 class Segment : public std::enable_shared_from_this<Segment> {
 public:
    Segment(std::string fname, uint32_t segment_id,
-            const std::shared_ptr<TabletSchema>& tablet_schema,
-            size_t num_rows_per_block);
+            const TabletSchema* tablet_schema);
    ~Segment();

    Status open();

-    Status new_iterator(const Schema& schema, std::unique_ptr<SegmentIterator>* iter);
+    std::unique_ptr<SegmentIterator> new_iterator(const Schema& schema, const StorageReadOptions& read_options);

    uint64_t id() const { return _segment_id; }

@ -71,7 +72,7 @@ private:
    friend class SegmentIterator;

    Status new_column_iterator(uint32_t cid, ColumnIterator** iter);
-    uint32_t num_rows_per_block() const { return _num_rows_per_block; }
+    uint32_t num_rows_per_block() const { return _sk_index_decoder->num_rows_per_block(); }
    size_t num_short_keys() const { return _tablet_schema->num_short_key_columns(); }

    Status _check_magic(uint64_t offset);
@ -97,8 +98,7 @@ private:
 private:
    std::string _fname;
    uint32_t _segment_id;
-    std::shared_ptr<TabletSchema> _tablet_schema;
-    uint32_t _num_rows_per_block;
+    const TabletSchema* _tablet_schema;

    SegmentFooterPB _footer;
    std::unique_ptr<RandomAccessFile> _input_file;
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@ -34,11 +34,13 @@ namespace segment_v2 {

 SegmentIterator::SegmentIterator(std::shared_ptr<Segment> segment,
                                 const Schema& schema)
-        : _segment(std::move(segment)),
-        _schema(schema),
-        _cur_range_id(0),
-        _column_iterators(_schema.num_columns(), nullptr),
-        _cur_rowid(0) {
+    : _segment(std::move(segment)),
+      _schema(schema),
+      _column_iterators(_schema.num_columns(), nullptr),
+      _row_ranges(RowRanges::create_single(_segment->num_rows())),
+      _cur_rowid(0),
+      _cur_range_id(0),
+      _inited(false) {
 }

 SegmentIterator::~SegmentIterator() {
@ -47,72 +49,66 @@ SegmentIterator::~SegmentIterator() {
    }
 }

-Status SegmentIterator::init(const StorageReadOptions& opts) {
+Status SegmentIterator::_init() {
    DorisMetrics::segment_read_total.increment(1);
-    _opts = opts;
-    RETURN_IF_ERROR(_init_short_key_range());
-    RETURN_IF_ERROR(_init_row_ranges());
+    RETURN_IF_ERROR(_get_row_ranges_by_keys());
+    RETURN_IF_ERROR(_get_row_ranges_by_column_conditions());
    if (!_row_ranges.is_empty()) {
        _cur_range_id = 0;
        _cur_rowid = _row_ranges.get_range_from(_cur_range_id);
    }
    RETURN_IF_ERROR(_init_column_iterators());
-
    return Status::OK();
 }

-// This function will use input key bounds to get a row range.
-Status SegmentIterator::_init_short_key_range() {
+Status SegmentIterator::_get_row_ranges_by_keys() {
    DorisMetrics::segment_row_total.increment(num_rows());
-    _lower_rowid = 0;
-    _upper_rowid = num_rows();
-    // initial short key row ranges: [0, num_rows())
-    _row_ranges = RowRanges::create_single(_lower_rowid, _upper_rowid);

-    // fast path for empty segment
-    if (_upper_rowid == 0) {
+    // fast path for empty segment or empty key ranges
+    if (_row_ranges.is_empty() || _opts.key_ranges.empty()) {
        return Status::OK();
    }

-    if (_opts.lower_bound == nullptr && _opts.upper_bound == nullptr) {
-        return Status::OK();
+    RowRanges result_ranges;
+    for (auto& key_range : _opts.key_ranges) {
+        rowid_t lower_rowid = 0;
+        rowid_t upper_rowid = num_rows();
+        RETURN_IF_ERROR(_prepare_seek(key_range));
+        if (key_range.upper_key != nullptr) {
+            // If client want to read upper_bound, the include_upper is true. So we
+            // should get the first ordinal at which key is larger than upper_bound.
+            // So we call _lookup_ordinal with include_upper's negate
+            RETURN_IF_ERROR(_lookup_ordinal(
+                *key_range.upper_key, !key_range.include_upper, num_rows(), &upper_rowid));
+        }
+        if (upper_rowid > 0 && key_range.lower_key != nullptr) {
+            RETURN_IF_ERROR(
+                _lookup_ordinal(*key_range.lower_key, key_range.include_lower, upper_rowid, &lower_rowid));
+        }
+        auto row_range = RowRanges::create_single(lower_rowid, upper_rowid);
+        RowRanges::ranges_union(result_ranges, row_range, &result_ranges);
    }
-
-    RETURN_IF_ERROR(_prepare_seek());
-
-    // init row range with short key range
-    if (_opts.upper_bound != nullptr) {
-        // If client want to read upper_bound, the include_upper_bound is true. So we
-        // should get the first ordinal at which key is larger than upper_bound.
-        // So we call _lookup_ordinal with include_upper_bound's negate
-        RETURN_IF_ERROR(_lookup_ordinal(
-                *_opts.upper_bound, !_opts.include_upper_bound, num_rows(), &_upper_rowid));
-    }
-    if (_upper_rowid > 0 && _opts.lower_bound != nullptr) {
-        RETURN_IF_ERROR(_lookup_ordinal(
-                *_opts.lower_bound, _opts.include_lower_bound, _upper_rowid, &_lower_rowid));
-    }
-    // seeked short key row ranges: [_lower_rowid, _upper_rowid)
-    _row_ranges = RowRanges::create_single(_lower_rowid, _upper_rowid);
-    DorisMetrics::segment_rows_by_short_key.increment(_upper_rowid - _lower_rowid);
+    // pre-condition: _row_ranges == [0, num_rows)
+    _row_ranges = std::move(result_ranges);
+    DorisMetrics::segment_rows_by_short_key.increment(_row_ranges.count());

    return Status::OK();
 }

 // Set up environment for the following seek.
-Status SegmentIterator::_prepare_seek() {
+Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_range) {
    std::vector<const Field*> key_fields;
    std::set<uint32_t> column_set;
-    if (_opts.lower_bound != nullptr) {
-        for (auto cid : _opts.lower_bound->schema()->column_ids()) {
+    if (key_range.lower_key != nullptr) {
+        for (auto cid : key_range.lower_key->schema()->column_ids()) {
            column_set.emplace(cid);
-            key_fields.emplace_back(_opts.lower_bound->schema()->column(cid));
+            key_fields.emplace_back(key_range.lower_key->schema()->column(cid));
        }
    }
-    if (_opts.upper_bound != nullptr) {
-        for (auto cid : _opts.upper_bound->schema()->column_ids()) {
+    if (key_range.upper_key != nullptr) {
+        for (auto cid : key_range.upper_key->schema()->column_ids()) {
            if (column_set.count(cid) == 0) {
-                key_fields.emplace_back(_opts.upper_bound->schema()->column(cid));
+                key_fields.emplace_back(key_range.upper_key->schema()->column(cid));
                column_set.emplace(cid);
            }
        }
@ -123,15 +119,15 @@ Status SegmentIterator::_prepare_seek() {
    // create used column iterator
    for (auto cid : _seek_schema->column_ids()) {
        if (_column_iterators[cid] == nullptr) {
-            RETURN_IF_ERROR(_create_column_iterator(cid, &_column_iterators[cid]));
+            RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid]));
        }
    }

    return Status::OK();
 }

-Status SegmentIterator::_init_row_ranges() {
-    if (_lower_rowid == _upper_rowid) {
+Status SegmentIterator::_get_row_ranges_by_column_conditions() {
+    if (_row_ranges.is_empty()) {
        // no data just return;
        return Status::OK();
    }
@ -174,7 +170,7 @@ Status SegmentIterator::_init_column_iterators() {
    }
    for (auto cid : _schema.column_ids()) {
        if (_column_iterators[cid] == nullptr) {
-            RETURN_IF_ERROR(_create_column_iterator(cid, &_column_iterators[cid]));
+            RETURN_IF_ERROR(_segment->new_column_iterator(cid, &_column_iterators[cid]));
        }

        _column_iterators[cid]->seek_to_ordinal(_cur_rowid);
@ -182,10 +178,6 @@ Status SegmentIterator::_init_column_iterators() {
    return Status::OK();
 }

-Status SegmentIterator::_create_column_iterator(uint32_t cid, ColumnIterator** iter) {
-    return _segment->new_column_iterator(cid, iter);
-}
-
 // Schema of lhs and rhs are different.
 // callers should assure that rhs' schema has all columns in lhs schema
 template<typename LhsRowType, typename RhsRowType>
@ -297,6 +289,11 @@ Status SegmentIterator::_next_batch(RowBlockV2* block, size_t* rows_read) {
 }

 Status SegmentIterator::next_batch(RowBlockV2* block) {
+    if (UNLIKELY(!_inited)) {
+        RETURN_IF_ERROR(_init());
+        _inited = true;
+    }
+
    if (_row_ranges.is_empty() || _cur_rowid >= _row_ranges.to()) {
        block->resize(0);
        return Status::EndOfFile("no more data in segment");
@ -304,7 +301,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
    size_t rows_to_read = block->capacity();
    while (rows_to_read > 0) {
        if (_cur_rowid >= _row_ranges.get_range_to(_cur_range_id)) {
-            // current row range is read over,
+            // current row range is read over, trying to read from next range
            if (_cur_range_id >= _row_ranges.range_size() - 1) {
                // there is no more row range
                break;
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@ -45,20 +45,29 @@ class SegmentIterator : public RowwiseIterator {
 public:
    SegmentIterator(std::shared_ptr<Segment> segment, const Schema& _schema);
    ~SegmentIterator() override;
-    Status init(const StorageReadOptions& opts) override;
+    Status init(const StorageReadOptions& opts) override {
+        _opts = opts;
+        return Status::OK();
+    }
    Status next_batch(RowBlockV2* row_block) override;
    const Schema& schema() const override { return _schema; }
 private:
-    Status _init_short_key_range();
-    Status _prepare_seek();
-    Status _init_row_ranges();
-    Status _get_row_ranges_from_zone_map(RowRanges* zone_map_row_ranges);
-    Status _init_column_iterators();
-    Status _create_column_iterator(uint32_t cid, ColumnIterator** iter);
+    Status _init();

+    // calculate row ranges that fall into requested key ranges using short key index
+    Status _get_row_ranges_by_keys();
+    Status _prepare_seek(const StorageReadOptions::KeyRange& key_range);
    Status _lookup_ordinal(const RowCursor& key, bool is_include,
                           rowid_t upper_bound, rowid_t* rowid);
    Status _seek_and_peek(rowid_t rowid);
+
+    // calculate row ranges that satisfy requested column conditions using various column index
+    Status _get_row_ranges_by_column_conditions();
+    // TODO move column index related logic to ColumnReader
+    Status _get_row_ranges_from_zone_map(RowRanges* zone_map_row_ranges);
+
+    Status _init_column_iterators();
+
    Status _next_batch(RowBlockV2* block, size_t* rows_read);

    uint32_t segment_id() const { return _segment->id(); }
@ -68,28 +77,26 @@ private:
    std::shared_ptr<Segment> _segment;
    // TODO(zc): rethink if we need copy it
    Schema _schema;
+    // _column_iterators.size() == _schema.num_columns()
+    // _column_iterators[cid] == nullptr if cid is not in _schema
+    std::vector<ColumnIterator*> _column_iterators;
+    // after init(), `_row_ranges` contains all rowid to scan
+    RowRanges _row_ranges;
+    // the next rowid to read
+    rowid_t _cur_rowid;
+    // index of the row range where `_cur_rowid` belongs to
+    size_t _cur_range_id;
+    // the actual init process is delayed to the first call to next_batch()
+    bool _inited;

    StorageReadOptions _opts;

-    // row ranges to scan
-    size_t _cur_range_id;
-    RowRanges _row_ranges;
-
-    // Only used when init is called, help to finish seek_and_peek.
-    // Data will be saved in this batch
+    // row schema of the key to seek
+    // only used in `_get_row_ranges_by_keys`
    std::unique_ptr<Schema> _seek_schema;
-
-    // used to read data from columns when do bianry search to find
-    // oridnal for input bounds
+    // used to binary search the rowid for a given key
+    // only used in `_get_row_ranges_by_keys`
    std::unique_ptr<RowBlockV2> _seek_block;
-    // helper to save row to compare with input bounds
-    std::unique_ptr<RowCursor> _key_cursor;
-
-    std::vector<ColumnIterator*> _column_iterators;
-
-    rowid_t _lower_rowid;
-    rowid_t _upper_rowid;
-    rowid_t _cur_rowid;

    Arena _arena;
 };
--- a/be/src/olap/schema.h
+++ b/be/src/olap/schema.h
@ -99,7 +99,7 @@ public:
    ~Schema();

    const std::vector<Field*>& columns() const { return _cols; }
-    const Field* column(int idx) const { return _cols[idx]; }
+    const Field* column(ColumnId cid) const { return _cols[cid]; }

    size_t num_key_columns() const {
        return _num_key_columns;
@ -133,8 +133,11 @@ public:
    size_t num_column_ids() const { return _col_ids.size(); }
    const std::vector<ColumnId>& column_ids() const { return _col_ids; }
 private:
-    std::vector<Field*> _cols;
+    // all valid ColumnIds in this schema
    std::vector<ColumnId> _col_ids;
+    // _cols[cid] is ony valid when cid is contained in `_col_ids`
+    std::vector<Field*> _cols;
+    // _col_offsets[cid] is ony valid when cid is contained in `_col_ids`
    std::vector<size_t> _col_offsets;
    size_t _num_key_columns;
    size_t _schema_size;
--- a/be/src/olap/short_key_index.h
+++ b/be/src/olap/short_key_index.h
@ -236,6 +236,8 @@ public:

    uint32_t num_items() const { return _footer.num_items(); }

+    uint32_t num_rows_per_block() const { return _footer.num_rows_per_block(); }
+
    Slice key(ssize_t ordinal) const {
        DCHECK(ordinal >= 0 && ordinal < num_items());
        return {_key_data.data + _offsets[ordinal], _offsets[ordinal + 1] - _offsets[ordinal]};
--- a/be/src/util/doris_metrics.h
+++ b/be/src/util/doris_metrics.h
@ -107,9 +107,15 @@ public:
    static IntCounter meta_read_request_total;
    static IntCounter meta_read_request_duration_us;

+    // Counters for segment_v2
+    // -----------------------
+    // total number of segments read
    static IntCounter segment_read_total;
+    // total number of rows in queried segments (before index pruning)
    static IntCounter segment_row_total;
+    // total number of rows selected by short key index
    static IntCounter segment_rows_by_short_key;
+    // total number of rows selected by zone map index
    static IntCounter segment_rows_read_by_zone_map;

    static IntCounter txn_begin_request_total;
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@ -89,7 +89,7 @@ TEST_F(SegmentReaderWriterTest, normal) {
    ASSERT_TRUE(st.ok());
    // reader
    {
-        std::shared_ptr<Segment> segment(new Segment(fname, 0, tablet_schema, num_rows_per_block));
+        std::shared_ptr<Segment> segment(new Segment(fname, 0, tablet_schema.get()));
        st = segment->open();
        LOG(INFO) << "segment open, msg=" << st.to_string();
        ASSERT_TRUE(st.ok());
@ -97,13 +97,8 @@ TEST_F(SegmentReaderWriterTest, normal) {
        Schema schema(*tablet_schema);
        // scan all rows
        {
-            std::unique_ptr<SegmentIterator> iter;
-            st = segment->new_iterator(schema, &iter);
-            ASSERT_TRUE(st.ok());
-
            StorageReadOptions read_opts;
-            st = iter->init(read_opts);
-            ASSERT_TRUE(st.ok());
+            std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);

            Arena arena;
            RowBlockV2 block(schema, 1024, &arena);
@ -132,14 +127,8 @@ TEST_F(SegmentReaderWriterTest, normal) {
        }
        // test seek, key
        {
-            std::unique_ptr<SegmentIterator> iter;
-            st = segment->new_iterator(schema, &iter);
-            ASSERT_TRUE(st.ok());
-
            // lower bound
-            StorageReadOptions read_opts;
-            read_opts.lower_bound.reset(new RowCursor());
-            RowCursor* lower_bound = read_opts.lower_bound.get();
+            std::unique_ptr<RowCursor> lower_bound(new RowCursor());
            lower_bound->init(*tablet_schema, 2);
            {
                auto cell = lower_bound->cell(0);
@ -151,22 +140,19 @@ TEST_F(SegmentReaderWriterTest, normal) {
                cell.set_not_null();
                *(int*)cell.mutable_cell_ptr() = 100;
            }
-            read_opts.include_lower_bound = false;

            // upper bound
-            read_opts.upper_bound.reset(new RowCursor());
-            RowCursor* upper_bound = read_opts.upper_bound.get();
+            std::unique_ptr<RowCursor> upper_bound(new RowCursor());
            upper_bound->init(*tablet_schema, 1);
            {
                auto cell = upper_bound->cell(0);
                cell.set_not_null();
                *(int*)cell.mutable_cell_ptr() = 200;
            }
-            read_opts.include_upper_bound = true;

-            st = iter->init(read_opts);
-            LOG(INFO) << "iterator init msg=" << st.to_string();
-            ASSERT_TRUE(st.ok());
+            StorageReadOptions read_opts;
+            read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), true);
+            std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);

            Arena arena;
            RowBlockV2 block(schema, 100, &arena);
@ -180,26 +166,18 @@ TEST_F(SegmentReaderWriterTest, normal) {
        }
        // test seek, key
        {
-            std::unique_ptr<SegmentIterator> iter;
-            st = segment->new_iterator(schema, &iter);
-            ASSERT_TRUE(st.ok());
-
-            StorageReadOptions read_opts;
-
            // lower bound
-            read_opts.lower_bound.reset(new RowCursor());
-            RowCursor* lower_bound = read_opts.lower_bound.get();
+            std::unique_ptr<RowCursor> lower_bound(new RowCursor());
            lower_bound->init(*tablet_schema, 1);
            {
                auto cell = lower_bound->cell(0);
                cell.set_not_null();
                *(int*)cell.mutable_cell_ptr() = 40970;
            }
-            read_opts.include_lower_bound = false;

-            st = iter->init(read_opts);
-            LOG(INFO) << "iterator init msg=" << st.to_string();
-            ASSERT_TRUE(st.ok());
+            StorageReadOptions read_opts;
+            read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
+            std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);

            Arena arena;
            RowBlockV2 block(schema, 100, &arena);
@ -209,36 +187,26 @@ TEST_F(SegmentReaderWriterTest, normal) {
        }
        // test seek, key (-2, -1)
        {
-            std::unique_ptr<SegmentIterator> iter;
-            st = segment->new_iterator(schema, &iter);
-            ASSERT_TRUE(st.ok());
-
-            StorageReadOptions read_opts;
-
            // lower bound
-            read_opts.lower_bound.reset(new RowCursor());
-            RowCursor* lower_bound = read_opts.lower_bound.get();
+            std::unique_ptr<RowCursor> lower_bound(new RowCursor());
            lower_bound->init(*tablet_schema, 1);
            {
                auto cell = lower_bound->cell(0);
                cell.set_not_null();
                *(int*)cell.mutable_cell_ptr() = -2;
            }
-            read_opts.include_lower_bound = false;

-            read_opts.upper_bound.reset(new RowCursor());
-            RowCursor* upper_bound = read_opts.upper_bound.get();
+            std::unique_ptr<RowCursor> upper_bound(new RowCursor());
            upper_bound->init(*tablet_schema, 1);
            {
                auto cell = upper_bound->cell(0);
                cell.set_not_null();
                *(int*)cell.mutable_cell_ptr() = -1;
            }
-            read_opts.include_upper_bound = false;

-            st = iter->init(read_opts);
-            LOG(INFO) << "iterator init msg=" << st.to_string();
-            ASSERT_TRUE(st.ok());
+            StorageReadOptions read_opts;
+            read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false);
+            std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);

            Arena arena;
            RowBlockV2 block(schema, 100, &arena);
@ -299,18 +267,13 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {

    // reader with condition
    {
-        std::shared_ptr<Segment> segment(new Segment(fname, 0, tablet_schema, num_rows_per_block));
+        std::shared_ptr<Segment> segment(new Segment(fname, 0, tablet_schema.get()));
        st = segment->open();
        ASSERT_TRUE(st.ok());
        ASSERT_EQ(64 * 1024, segment->num_rows());
        Schema schema(*tablet_schema);
        // scan all rows
        {
-            std::unique_ptr<SegmentIterator> iter;
-            st = segment->new_iterator(schema, &iter);
-            ASSERT_TRUE(st.ok());
-
-            StorageReadOptions read_opts;
            TCondition condition;
            condition.__set_column_name("2");
            condition.__set_condition_op("<");
@ -319,9 +282,11 @@ TEST_F(SegmentReaderWriterTest, TestZoneMap) {
            std::shared_ptr<Conditions> conditions(new Conditions());
            conditions->set_tablet_schema(tablet_schema.get());
            conditions->append_condition(condition);
-            read_opts.conditions = conditions;
-            st = iter->init(read_opts);
-            ASSERT_TRUE(st.ok());
+
+            StorageReadOptions read_opts;
+            read_opts.conditions = conditions.get();
+
+            std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);

            Arena arena;
            RowBlockV2 block(schema, 1024, &arena);