diff --git a/be/src/olap/column_block.h b/be/src/olap/column_block.h index 4fc5403241..0b82617df0 100644 --- a/be/src/olap/column_block.h +++ b/be/src/olap/column_block.h @@ -33,8 +33,8 @@ class ColumnBlockCell; // It doesn't own any data, user should keep the life of input data. class ColumnBlock { public: - ColumnBlock(const TypeInfo* type_info, uint8_t* data, uint8_t* null_bitmap, Arena* arena) - : _type_info(type_info), _data(data), _null_bitmap(null_bitmap), _arena(arena) { } + ColumnBlock(const TypeInfo* type_info, uint8_t* data, uint8_t* null_bitmap, size_t nrows, Arena* arena) + : _type_info(type_info), _data(data), _null_bitmap(null_bitmap), _nrows(nrows), _arena(arena) { } const TypeInfo* type_info() const { return _type_info; } uint8_t* data() const { return _data; } @@ -53,10 +53,14 @@ public: } ColumnBlockCell cell(size_t idx) const; + + size_t nrows() const { return _nrows; } + private: const TypeInfo* _type_info; uint8_t* _data; uint8_t* _null_bitmap; + size_t _nrows; Arena* _arena; }; diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index c42d5af966..1ac974f602 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -18,16 +18,29 @@ #ifndef DORIS_BE_SRC_OLAP_COLUMN_PREDICATE_H #define DORIS_BE_SRC_OLAP_COLUMN_PREDICATE_H +#include "olap/column_block.h" +#include "olap/selection_vector.h" + namespace doris { class VectorizedRowBatch; class ColumnPredicate { public: + explicit ColumnPredicate(uint32_t column_id) : _column_id(column_id) { } + virtual ~ColumnPredicate() {} //evaluate predicate on VectorizedRowBatch virtual void evaluate(VectorizedRowBatch* batch) const = 0; + + // evaluate predicate on ColumnBlock + virtual void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const = 0; + + uint32_t column_id() const { return _column_id; } + +protected: + uint32_t _column_id; }; } //namespace doris diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index 35ec966a85..93d53743a6 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -24,8 +24,8 @@ namespace doris { #define COMPARISON_PRED_CONSTRUCTOR(CLASS) \ template \ - CLASS::CLASS(int column_id, const type& value) \ - : _column_id(column_id), \ + CLASS::CLASS(uint32_t column_id, const type& value) \ + : ColumnPredicate(column_id), \ _value(value) \ {} \ @@ -38,8 +38,8 @@ COMPARISON_PRED_CONSTRUCTOR(GreaterEqualPredicate) #define COMPARISON_PRED_CONSTRUCTOR_STRING(CLASS) \ template<> \ - CLASS::CLASS(int column_id, const StringValue& value) \ - : _column_id(column_id) \ + CLASS::CLASS(uint32_t column_id, const StringValue& value) \ + : ColumnPredicate(column_id) \ { \ _value.len = value.len; \ _value.ptr = value.ptr; \ @@ -102,7 +102,6 @@ COMPARISON_PRED_CONSTRUCTOR_STRING(GreaterEqualPredicate) } \ } \ - COMPARISON_PRED_EVALUATE(EqualPredicate, ==) COMPARISON_PRED_EVALUATE(NotEqualPredicate, !=) COMPARISON_PRED_EVALUATE(LessPredicate, <) @@ -110,18 +109,47 @@ COMPARISON_PRED_EVALUATE(LessEqualPredicate, <=) COMPARISON_PRED_EVALUATE(GreaterPredicate, >) COMPARISON_PRED_EVALUATE(GreaterEqualPredicate, >=) +#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \ + template \ + void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \ + uint16_t new_size = 0; \ + if (block->is_nullable()) { \ + for (uint16_t i = 0; i < *size; ++i) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const type* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + new_size += (!block->cell(idx).is_null() && (*cell_value OP _value)); \ + } \ + } else { \ + for (uint16_t i = 0; i < *size; ++i) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const type* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + new_size += (*cell_value OP _value); \ + } \ + } \ + *size = new_size; \ + } \ + +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(EqualPredicate, ==) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(NotEqualPredicate, !=) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(LessPredicate, <) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(LessEqualPredicate, <=) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) + #define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS) \ - template CLASS::CLASS(int column_id, const int8_t& value); \ - template CLASS::CLASS(int column_id, const int16_t& value); \ - template CLASS::CLASS(int column_id, const int32_t& value); \ - template CLASS::CLASS(int column_id, const int64_t& value); \ - template CLASS::CLASS(int column_id, const int128_t& value); \ - template CLASS::CLASS(int column_id, const float& value); \ - template CLASS::CLASS(int column_id, const double& value); \ - template CLASS::CLASS(int column_id, const decimal12_t& value); \ - template CLASS::CLASS(int column_id, const StringValue& value); \ - template CLASS::CLASS(int column_id, const uint24_t& value); \ - template CLASS::CLASS(int column_id, const uint64_t& value); \ + template CLASS::CLASS(uint32_t column_id, const int8_t& value); \ + template CLASS::CLASS(uint32_t column_id, const int16_t& value); \ + template CLASS::CLASS(uint32_t column_id, const int32_t& value); \ + template CLASS::CLASS(uint32_t column_id, const int64_t& value); \ + template CLASS::CLASS(uint32_t column_id, const int128_t& value); \ + template CLASS::CLASS(uint32_t column_id, const float& value); \ + template CLASS::CLASS(uint32_t column_id, const double& value); \ + template CLASS::CLASS(uint32_t column_id, const decimal12_t& value); \ + template CLASS::CLASS(uint32_t column_id, const StringValue& value); \ + template CLASS::CLASS(uint32_t column_id, const uint24_t& value); \ + template CLASS::CLASS(uint32_t column_id, const uint64_t& value); \ COMPARISON_PRED_CONSTRUCTOR_DECLARATION(EqualPredicate) COMPARISON_PRED_CONSTRUCTOR_DECLARATION(NotEqualPredicate) @@ -150,4 +178,24 @@ COMPARISON_PRED_EVALUATE_DECLARATION(LessEqualPredicate) COMPARISON_PRED_EVALUATE_DECLARATION(GreaterPredicate) COMPARISON_PRED_EVALUATE_DECLARATION(GreaterEqualPredicate) +#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS) \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(EqualPredicate) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotEqualPredicate) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(LessPredicate) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(LessEqualPredicate) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(GreaterPredicate) +COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(GreaterEqualPredicate) + } //namespace doris diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index d4747bcd47..36d3f755b1 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -29,11 +29,11 @@ class VectorizedRowBatch; template \ class CLASS : public ColumnPredicate { \ public: \ - CLASS(int column_id, const type& value); \ + CLASS(uint32_t column_id, const type& value); \ virtual ~CLASS() { } \ virtual void evaluate(VectorizedRowBatch* batch) const override; \ + void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override; \ private: \ - int32_t _column_id; \ type _value; \ }; \ diff --git a/be/src/olap/generic_iterators.cpp b/be/src/olap/generic_iterators.cpp index f9630a2de7..2d540cfecc 100644 --- a/be/src/olap/generic_iterators.cpp +++ b/be/src/olap/generic_iterators.cpp @@ -94,6 +94,7 @@ Status AutoIncrementIterator::next_batch(RowBlockV2* block) { _rows_returned++; } block->set_num_rows(row_idx); + block->set_selected_size(row_idx); if (row_idx > 0) { return Status::OK(); } @@ -125,7 +126,8 @@ public: // Before call this function, Client must assure that // valid() return true RowBlockRow current_row() const { - return RowBlockRow(&_block, _index_in_block); + uint16_t* selection_vector = _block.selection_vector(); + return RowBlockRow(&_block, selection_vector[_index_in_block]); } // Advance internal row index to next valid row @@ -148,22 +150,23 @@ private: RowBlockV2 _block; bool _valid = false; - size_t _index_in_block = 0; + size_t _index_in_block = -1; }; Status MergeIteratorContext::init(const StorageReadOptions& opts) { RETURN_IF_ERROR(_iter->init(opts)); RETURN_IF_ERROR(_load_next_block()); + if (valid()) { + RETURN_IF_ERROR(advance()); + } return Status::OK(); } Status MergeIteratorContext::advance() { // NOTE: we increase _index_in_block directly to valid one check - _index_in_block++; do { - for (; _index_in_block < _block.num_rows(); ++_index_in_block) { - // TODO(zc): we can skip rows that is fitered by conjunts here - // Now we return directly + _index_in_block++; + if (_index_in_block < _block.selected_size()) { return Status::OK(); } // current batch has no data, load next batch @@ -186,7 +189,7 @@ Status MergeIteratorContext::_load_next_block() { } } } while (_block.num_rows() == 0); - _index_in_block = 0; + _index_in_block = -1; _valid = true; return Status::OK(); } @@ -266,6 +269,7 @@ Status MergeIterator::next_batch(RowBlockV2* block) { } } block->set_num_rows(row_idx); + block->set_selected_size(row_idx); if (row_idx > 0) { return Status::OK(); } else { diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp index 6a723948e2..e7c5478317 100644 --- a/be/src/olap/in_list_predicate.cpp +++ b/be/src/olap/in_list_predicate.cpp @@ -24,8 +24,8 @@ namespace doris { #define IN_LIST_PRED_CONSTRUCTOR(CLASS) \ template \ -CLASS::CLASS(int column_id, std::set&& values) \ - : _column_id(column_id), \ +CLASS::CLASS(uint32_t column_id, std::set&& values) \ + : ColumnPredicate(column_id), \ _values(std::move(values)) {} \ IN_LIST_PRED_CONSTRUCTOR(InListPredicate) @@ -84,18 +84,43 @@ void CLASS::evaluate(VectorizedRowBatch* batch) const { \ IN_LIST_PRED_EVALUATE(InListPredicate, !=) IN_LIST_PRED_EVALUATE(NotInListPredicate, ==) +#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \ + template \ + void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \ + uint16_t new_size = 0; \ + if (block->is_nullable()) { \ + for (uint16_t i = 0; i < *size; ++i) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const type* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + new_size += (!block->cell(idx).is_null() && _values.find(*cell_value) OP _values.end()); \ + } \ + } else { \ + for (uint16_t i = 0; i < *size; ++i) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const type* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + new_size += (_values.find(*cell_value) OP _values.end()); \ + } \ + } \ + *size = new_size; \ + } \ + +IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, ==) +IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, !=) + #define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ - template CLASS::CLASS(int column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ + template CLASS::CLASS(uint32_t column_id, std::set&& values); \ IN_LIST_PRED_CONSTRUCTOR_DECLARATION(InListPredicate) IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate) @@ -115,4 +140,21 @@ IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate) IN_LIST_PRED_EVALUATE_DECLARATION(InListPredicate) IN_LIST_PRED_EVALUATE_DECLARATION(NotInListPredicate) + +#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS) \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + +IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(InListPredicate) +IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate) + } //namespace doris diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index de2c913129..5ebfae8dcf 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -30,11 +30,11 @@ class VectorizedRowBatch; template \ class CLASS : public ColumnPredicate { \ public: \ - CLASS(int column_id, std::set&& values); \ + CLASS(uint32_t column_id, std::set&& values); \ virtual ~CLASS() {} \ virtual void evaluate(VectorizedRowBatch* batch) const override; \ + void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override; \ private: \ - int32_t _column_id; \ std::set _values; \ }; \ diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index 6861f01aa0..81f2072777 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -27,6 +27,7 @@ class RowCursor; class RowBlockV2; class Schema; class Conditions; +class ColumnPredicate; class StorageReadOptions { public: @@ -69,6 +70,11 @@ public: // delete conditions used by column index to filter pages std::vector delete_conditions; + // reader's column predicate, nullptr if not existed + // used to fiter rows in row block + // TODO(hkp): refactor the column predicate framework + // to unify Conditions and ColumnPredicate + const std::vector* column_predicates = nullptr; }; // Used to read data in RowBlockV2 one by one diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp index 59c1e47fe2..0e276e9012 100644 --- a/be/src/olap/null_predicate.cpp +++ b/be/src/olap/null_predicate.cpp @@ -22,8 +22,8 @@ namespace doris { -NullPredicate::NullPredicate(int32_t column_id, bool is_null) - : _column_id(column_id), _is_null(is_null) {} +NullPredicate::NullPredicate(uint32_t column_id, bool is_null) + : ColumnPredicate(column_id), _is_null(is_null) {} NullPredicate::~NullPredicate() {} @@ -60,4 +60,18 @@ void NullPredicate::evaluate(VectorizedRowBatch* batch) const { } } +void NullPredicate::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { + uint16_t new_size = 0; + if (!block->is_nullable() && _is_null) { + *size = 0; + return; + } + for (uint16_t i = 0; i < *size; ++i) { + uint16_t idx = sel[i]; + sel[new_size] = idx; + new_size += (block->cell(idx).is_null() == _is_null); + } + *size = new_size; +} + } //namespace doris diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h index 152d945cf4..bf991ed283 100644 --- a/be/src/olap/null_predicate.h +++ b/be/src/olap/null_predicate.h @@ -18,7 +18,6 @@ #ifndef DORIS_BE_SRC_OLAP_NULL_PREDICATE_H #define DORIS_BE_SRC_OLAP_NULL_PREDICATE_H -#include #include "olap/column_predicate.h" namespace doris { @@ -27,12 +26,14 @@ class VectorizedRowBatch; class NullPredicate : public ColumnPredicate { public: - NullPredicate(int32_t column_id, bool is_null); + NullPredicate(uint32_t column_id, bool is_null); virtual ~NullPredicate(); virtual void evaluate(VectorizedRowBatch* batch) const override; + + void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override; + private: - int32_t _column_id; bool _is_null; //true for null, false for not null }; diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 36aebddef8..972371a854 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -30,7 +30,8 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity) : _schema(schema), _capacity(capacity), _column_datas(_schema.num_columns(), nullptr), - _column_null_bitmaps(_schema.num_columns(), nullptr) { + _column_null_bitmaps(_schema.num_columns(), nullptr), + _selection_vector(nullptr) { auto bitmap_size = BitmapSize(capacity); for (auto cid : _schema.column_ids()) { size_t data_size = _schema.column(cid)->type_info()->size() * _capacity; @@ -40,6 +41,7 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity) _column_null_bitmaps[cid] = new uint8_t[bitmap_size];; } } + _selection_vector = new uint16_t[_capacity]; clear(); } @@ -50,6 +52,7 @@ RowBlockV2::~RowBlockV2() { for (auto null_bitmap : _column_null_bitmaps) { delete[] null_bitmap; } + delete[] _selection_vector; } Status RowBlockV2::copy_to_row_cursor(size_t row_idx, RowCursor* cursor) { diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h index 8791262ade..c55e44b8df 100644 --- a/be/src/olap/row_block2.h +++ b/be/src/olap/row_block2.h @@ -25,6 +25,7 @@ #include "olap/column_block.h" #include "olap/schema.h" #include "olap/types.h" +#include "olap/selection_vector.h" namespace doris { @@ -54,6 +55,10 @@ public: void clear() { _num_rows = 0; _arena.reset(new Arena); + _selected_size = _capacity; + for (int i = 0; i < _selected_size; ++i) { + _selection_vector[i] = i; + } } // Copy the row_idx row's data into given row_cursor. @@ -67,13 +72,25 @@ public: const TypeInfo* type_info = _schema.column(cid)->type_info(); uint8_t* data = _column_datas[cid]; uint8_t* null_bitmap = _column_null_bitmaps[cid]; - return ColumnBlock(type_info, data, null_bitmap, _arena.get()); + return ColumnBlock(type_info, data, null_bitmap, _capacity, _arena.get()); } RowBlockRow row(size_t row_idx) const; const Schema* schema() const { return &_schema; } + uint16_t* selection_vector() const { + return _selection_vector; + } + + uint16_t selected_size() const { + return _selected_size; + } + + void set_selected_size(uint16_t selected_size) { + _selected_size = selected_size; + } + private: Schema _schema; size_t _capacity; @@ -88,6 +105,11 @@ private: size_t _num_rows; // manages the memory for slice's data std::unique_ptr _arena; + + // index of selected rows for rows passed the predicate + uint16_t* _selection_vector; + // selected rows number + uint16_t _selected_size; }; // Stands for a row in RowBlockV2. It is consisted of a RowBlockV2 reference diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 68ec4cb59c..af3ef13396 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -52,6 +52,7 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) { read_context->delete_handler->get_delete_conditions_after_version(_rowset->end_version(), &read_options.delete_conditions); } + read_options.column_predicates = read_context->predicates; // create iterator for each segment std::vector> seg_iterators; @@ -117,7 +118,10 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) { } // convert to output block _output_block->clear(); - for (size_t row_idx = 0; row_idx < _input_block->num_rows(); ++row_idx) { + size_t rows_read = 0; + uint16_t* selection_vector = _input_block->selection_vector(); + for (size_t i = 0; i < _input_block->selected_size(); ++i) { + uint16_t row_idx = selection_vector[i]; // shallow copy row from input block to output block _output_block->get_row(row_idx, _row.get()); // this copy function will copy return_columns' row to seek_columns's row_cursor @@ -126,10 +130,11 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) { LOG(WARNING) << "failed to copy row: " << s.to_string(); return OLAP_ERR_ROWSET_READ_FAILED; } + ++rows_read; } _output_block->set_pos(0); - _output_block->set_limit(_input_block->num_rows()); - _output_block->finalize(_input_block->num_rows()); + _output_block->set_limit(rows_read); + _output_block->finalize(rows_read); *block = _output_block.get(); // update raw_rows_read counter _context->stats->raw_rows_read += _input_block->num_rows(); diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index c1cf60715d..706de61752 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -206,7 +206,7 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) { // And then copy the strings corresponding to the codewords to the destination buffer TypeInfo *type_info = get_type_info(OLAP_FIELD_TYPE_INT); // the data in page is not null - ColumnBlock column_block(type_info, _code_buf.data(), nullptr, dst->column_block()->arena()); + ColumnBlock column_block(type_info, _code_buf.data(), nullptr, *n, dst->column_block()->arena()); ColumnBlockView tmp_block_view(&column_block); RETURN_IF_ERROR(_data_page_decoder->next_batch(n, &tmp_block_view)); for (int i = 0; i < *n; ++i) { diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp index 31539a2168..96bb78ea7a 100644 --- a/be/src/olap/rowset/segment_v2/encoding_info.cpp +++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp @@ -21,6 +21,7 @@ #include "olap/rowset/segment_v2/bitshuffle_page.h" #include "olap/rowset/segment_v2/rle_page.h" #include "olap/rowset/segment_v2/binary_dict_page.h" +#include "gutil/strings/substitute.h" namespace doris { namespace segment_v2 { @@ -167,7 +168,9 @@ Status EncodingInfoResolver::get( auto key = std::make_pair(data_type, encoding_type); auto it = _encoding_map.find(key); if (it == std::end(_encoding_map)) { - return Status::InternalError("fail to find valid type encoding"); + return Status::InternalError( + strings::Substitute("fail to find valid type encoding, type:$0, encoding:$1", + data_type, encoding_type)); } *out = it->second; return Status::OK(); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index d3ce736d9b..f672d5456c 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -26,6 +26,7 @@ #include "olap/row_block2.h" #include "olap/row_cursor.h" #include "olap/short_key_index.h" +#include "olap/column_predicate.h" using strings::Substitute; @@ -310,34 +311,50 @@ Status SegmentIterator::next_batch(RowBlockV2* block) { block->set_num_rows(0); return Status::EndOfFile("no more data in segment"); } - size_t rows_to_read = block->capacity(); - while (rows_to_read > 0) { - if (_cur_rowid >= _row_ranges.get_range_to(_cur_range_id)) { - // current row range is read over, trying to read from next range - if (_cur_range_id >= _row_ranges.range_size() - 1) { - // there is no more row range - break; - } + + // check whether need to seek + if (_cur_rowid >= _row_ranges.get_range_to(_cur_range_id)) { + while (true) { // step to next row range ++_cur_range_id; - _cur_rowid = _row_ranges.get_range_from(_cur_range_id); + // current row range is read over, trying to read from next range + if (_cur_range_id >= _row_ranges.range_size() - 1) { + block->set_num_rows(0); + return Status::EndOfFile("no more data in segment"); + } if (_row_ranges.get_range_count(_cur_range_id) == 0) { // current row range is empty, just skip seek continue; } + _cur_rowid = _row_ranges.get_range_from(_cur_range_id); for (auto cid : block->schema()->column_ids()) { RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_cur_rowid)); } + break; } - size_t to_read_in_range = std::min(rows_to_read, size_t(_row_ranges.get_range_to(_cur_range_id) - _cur_rowid)); - RETURN_IF_ERROR(_next_batch(block, &to_read_in_range)); - _cur_rowid += to_read_in_range; - rows_to_read -= to_read_in_range; } - block->set_num_rows(block->capacity() - rows_to_read); + // next_batch just return the rows in current row range + // it is easier to realize lazy materialization in the future + size_t rows_to_read = std::min(block->capacity(), size_t(_row_ranges.get_range_to(_cur_range_id) - _cur_rowid)); + RETURN_IF_ERROR(_next_batch(block, &rows_to_read)); + _cur_rowid += rows_to_read; + block->set_num_rows(rows_to_read); + if (block->num_rows() == 0) { return Status::EndOfFile("no more data in segment"); } + // column predicate vectorization execution + // TODO(hkp): lazy materialization + // TODO(hkp): optimize column predicate to check column block once for one column + if (_opts.column_predicates != nullptr) { + // init selection position index + uint16_t selected_size = block->selected_size(); + for (auto column_predicate : *_opts.column_predicates) { + auto column_block = block->column_block(column_predicate->column_id()); + column_predicate->evaluate(&column_block, block->selection_vector(), &selected_size); + } + block->set_selected_size(selected_size); + } return Status::OK(); } diff --git a/be/src/olap/selection_vector.h b/be/src/olap/selection_vector.h new file mode 100644 index 0000000000..772fa0fd66 --- /dev/null +++ b/be/src/olap/selection_vector.h @@ -0,0 +1,172 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "util/bitmap.h" + +namespace doris { + +// Bit-vector representing the selection status of each row in a row block. +// +// It is used to scan data with column predicate. +// the bit in the selection vector is used to indicate whether the given row is live. +class SelectionVector { +public: + // Construct a new vector. The bits are initially in an indeterminate state. + // Call set_all_true() if you require all rows to be initially selected. + explicit SelectionVector(size_t row_capacity) + : _n_rows(row_capacity), + _n_bytes(BitmapSize(row_capacity)), + _bitmap(new uint8_t[_n_bytes]) { + CHECK_GT(_n_bytes, 0); + set_all_false(); + } + + // returen the number of selected rows. + size_t count_selected() const { + return Bits::Count(_bitmap.get(), _n_bytes); + } + + // Return true if any rows are selected, or false + // This is equivalent to (count_selected() > 0), but faster. + inline bool any_selected() const; + + bool is_row_selected(size_t row) const { + DCHECK_LT(row, _n_rows); + return BitmapTest(_bitmap.get(), row); + } + + void set_row_selected(size_t row) { + DCHECK_LT(row, _n_rows); + BitmapSet(_bitmap.get(), row); + } + + void set_all_true() { + memset(_bitmap.get(), 0xff, _n_bytes); + pad_extra_bits_wit_zeroes(); + } + + void set_all_false() { + memset(_bitmap.get(), 0, _n_bytes); + } + + void clear_bit(size_t row) { + DCHECK_LT(row, _n_rows); + return BitmapClear(_bitmap.get(), row); + } + + uint8_t* mutable_bitmap() { return _bitmap.get(); } + + const uint8_t* bitmap() const { return _bitmap.get(); } + + size_t nrows() const { return _n_rows; } + + std::string to_string() const { + return BitmapToString(_bitmap.get(), _n_rows); + } + +private: + // Pads any non-byte-aligned bits at the end of the SelectionVector with + // zeroes. + // + // To improve performance, CountSelected() and AnySelected() evaluate the + // SelectionVector's bitmap in terms of bytes. As such, they consider all of + // the trailing bits, even if the bitmap's bit length is not byte-aligned and + // some trailing bits aren't part of the bitmap. + // + // To address this without sacrificing performance, we need to zero out all + // trailing bits at construction time, or after any operation that sets all + // bytes in bulk. + + void pad_extra_bits_wit_zeroes() { + size_t bits_in_last_byte = _n_rows & 7; + if (bits_in_last_byte > 0) { + BitmapChangeBits(_bitmap.get(), _n_rows, 8 - bits_in_last_byte, false); + } + } + +private: + // row capacity + size_t _n_rows; + size_t _n_bytes; + std::unique_ptr _bitmap; + DISALLOW_COPY_AND_ASSIGN(SelectionVector); +}; + +inline bool SelectionVector::any_selected() const { + size_t rem = _n_bytes; + const uint32_t* p32 = reinterpret_cast(_bitmap.get()); + while (rem >= 4) { + if (*p32 != 0) { + return true; + } + ++p32; + rem -= 4; + } + + const uint8_t* p8 = reinterpret_cast(p32); + while (rem > 0) { + if (*p8 != 0) { + return true; + } + ++p8; + --rem; + } + return false; +} + +// A SelectionVectorView keeps track of where in the selection vector a given +// batch will start from. After processing a batch, Advance() should be called +// and the view will move forward by the appropriate amount. In this way, the +// underlying selection vector can easily be updated batch-by-batch. +class SelectionVectorView { + public: + // Constructs a new SelectionVectorView. + // + // The 'sel_vec' object must outlive this SelectionVectorView. + explicit SelectionVectorView(SelectionVector* sel_vec) + : _sel_vec(sel_vec), _row_offset(0) {} + void advance(size_t skip) { + DCHECK_LE(skip, _sel_vec->nrows() - _row_offset); + _row_offset += skip; + } + void set_bit(size_t row_idx) { + DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset); + BitmapSet(_sel_vec->mutable_bitmap(), _row_offset + row_idx); + } + void clear_bit(size_t row_idx) { + DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset); + BitmapClear(_sel_vec->mutable_bitmap(), _row_offset + row_idx); + } + bool test_bit(size_t row_idx) { + DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset); + return BitmapTest(_sel_vec->bitmap(), _row_offset + row_idx); + } + void clear_bits(size_t nrows) { + DCHECK_LE(nrows, _sel_vec->nrows() - _row_offset); + BitmapChangeBits(_sel_vec->mutable_bitmap(), _row_offset, nrows, false); + } + +private: + SelectionVector* _sel_vec; + size_t _row_offset; +}; + +} diff --git a/be/src/util/bitmap.cpp b/be/src/util/bitmap.cpp index e394babbb7..a6ef2903bf 100644 --- a/be/src/util/bitmap.cpp +++ b/be/src/util/bitmap.cpp @@ -19,6 +19,8 @@ #include +#include "gutil/stringprintf.h" + namespace doris { std::string Bitmap::DebugString(bool print_bits) const { @@ -132,4 +134,21 @@ bool BitmapFindFirst(const uint8_t *bitmap, size_t offset, size_t bitmap_size, return false; } +std::string BitmapToString(const uint8_t *bitmap, size_t num_bits) { + std::string s; + size_t index = 0; + while (index < num_bits) { + StringAppendF(&s, "%4zu: ", index); + for (int i = 0; i < 8 && index < num_bits; ++i) { + for (int j = 0; j < 8 && index < num_bits; ++j) { + StringAppendF(&s, "%d", BitmapTest(bitmap, index)); + index++; + } + StringAppendF(&s, " "); + } + StringAppendF(&s, "\n"); + } + return s; +} + } diff --git a/be/src/util/bitmap.h b/be/src/util/bitmap.h index ae2e89ca9b..2d0c1de062 100644 --- a/be/src/util/bitmap.h +++ b/be/src/util/bitmap.h @@ -113,6 +113,13 @@ inline bool BitmapEquals(const uint8_t* bm1, const uint8_t* bm2, size_t bitmap_s return (bm1[num_full_bytes] & mask) == (bm2[num_full_bytes] & mask); } +// This function will print the bitmap content in a format like the following: +// eg: 0001110000100010110011001100001100110011 +// output: +// 0000: 00011100 00100010 11001100 11000011 +// 0016: 00110011 +std::string BitmapToString(const uint8_t *bitmap, size_t num_bits); + // Iterator which yields ranges of set and unset bits. // Example usage: // bool value; diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt index 03deacac1b..7685e7a4ca 100644 --- a/be/test/olap/CMakeLists.txt +++ b/be/test/olap/CMakeLists.txt @@ -73,3 +73,4 @@ ADD_BE_TEST(short_key_index_test) ADD_BE_TEST(page_cache_test) ADD_BE_TEST(hll_test) ADD_BE_TEST(memtable_flush_executor_test) +ADD_BE_TEST(selection_vector_test) diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp index e02151074d..1222a86205 100644 --- a/be/test/olap/rowset/beta_rowset_test.cpp +++ b/be/test/olap/rowset/beta_rowset_test.cpp @@ -30,6 +30,7 @@ #include "olap/storage_engine.h" #include "olap/tablet_schema.h" #include "olap/utils.h" +#include "olap/comparison_predicate.h" #include "runtime/mem_tracker.h" #include "runtime/mem_pool.h" #include "util/slice.h" @@ -184,37 +185,75 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) { reader_context.seek_columns = &return_columns; reader_context.stats = &_stats; - RowsetReaderSharedPtr rowset_reader; - create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); + // without predicates + { + RowsetReaderSharedPtr rowset_reader; + create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); + RowBlock* output_block; + uint32_t num_rows_read = 0; + while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { + ASSERT_TRUE(output_block != nullptr); + ASSERT_GT(output_block->row_num(), 0); + ASSERT_EQ(0, output_block->pos()); + ASSERT_EQ(output_block->row_num(), output_block->limit()); + ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); + // after sort merge segments, k1 will be 0, 1, 2, 10, 11, 12, 20, 21, 22, ..., 40950, 40951, 40952 + for (int i = 0; i < output_block->row_num(); ++i) { + char* field1 = output_block->field_ptr(i, 0); + char* field2 = output_block->field_ptr(i, 1); + // test null bit + ASSERT_FALSE(*reinterpret_cast(field1)); + ASSERT_FALSE(*reinterpret_cast(field2)); + uint32_t k1 = *reinterpret_cast(field1 + 1); + uint32_t k2 = *reinterpret_cast(field2 + 1); + ASSERT_EQ(k1 * 10, k2); - RowBlock* output_block; - uint32_t num_rows_read = 0; - while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { - ASSERT_TRUE(output_block != nullptr); - ASSERT_GT(output_block->row_num(), 0); - ASSERT_EQ(0, output_block->pos()); - ASSERT_EQ(output_block->row_num(), output_block->limit()); - ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); - // after sort merge segments, k1 will be 0, 1, 2, 10, 11, 12, 20, 21, 22, ..., 40950, 40951, 40952 - for (int i = 0; i < output_block->row_num(); ++i) { - char* field1 = output_block->field_ptr(i, 0); - char* field2 = output_block->field_ptr(i, 1); - // test null bit - ASSERT_FALSE(*reinterpret_cast(field1)); - ASSERT_FALSE(*reinterpret_cast(field2)); - uint32_t k1 = *reinterpret_cast(field1 + 1); - uint32_t k2 = *reinterpret_cast(field2 + 1); - ASSERT_EQ(k1 * 10, k2); - - int rid = num_rows_read / 3; - int seg_id = num_rows_read % 3; - ASSERT_EQ(rid * 10 + seg_id, k1); - num_rows_read++; + int rid = num_rows_read / 3; + int seg_id = num_rows_read % 3; + ASSERT_EQ(rid * 10 + seg_id, k1); + num_rows_read++; + } } + EXPECT_EQ(OLAP_ERR_DATA_EOF, s); + EXPECT_TRUE(output_block == nullptr); + EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read); + } + + // merge segments with predicates + { + std::vector column_predicates; + // column predicate: k1 = 10 + std::unique_ptr predicate(new EqualPredicate(0, 10)); + column_predicates.emplace_back(predicate.get()); + reader_context.predicates = &column_predicates; + RowsetReaderSharedPtr rowset_reader; + create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); + RowBlock* output_block; + uint32_t num_rows_read = 0; + while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { + ASSERT_TRUE(output_block != nullptr); + ASSERT_EQ(1, output_block->row_num()); + ASSERT_EQ(0, output_block->pos()); + ASSERT_EQ(output_block->row_num(), output_block->limit()); + ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); + // after sort merge segments, k1 will be 10 + for (int i = 0; i < output_block->row_num(); ++i) { + char* field1 = output_block->field_ptr(i, 0); + char* field2 = output_block->field_ptr(i, 1); + // test null bit + ASSERT_FALSE(*reinterpret_cast(field1)); + ASSERT_FALSE(*reinterpret_cast(field2)); + uint32_t k1 = *reinterpret_cast(field1 + 1); + uint32_t k2 = *reinterpret_cast(field2 + 1); + ASSERT_EQ(10, k1); + ASSERT_EQ(k1 * 10, k2); + num_rows_read++; + } + } + EXPECT_EQ(OLAP_ERR_DATA_EOF, s); + EXPECT_TRUE(output_block == nullptr); + EXPECT_EQ(1, num_rows_read); } - EXPECT_EQ(OLAP_ERR_DATA_EOF, s); - EXPECT_TRUE(output_block == nullptr); - EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read); } { // test return unordered data and only k3 @@ -226,30 +265,66 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) { reader_context.seek_columns = &return_columns; reader_context.stats = &_stats; - RowsetReaderSharedPtr rowset_reader; - create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); + // without predicate + { + RowsetReaderSharedPtr rowset_reader; + create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); - RowBlock* output_block; - uint32_t num_rows_read = 0; - while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { - ASSERT_TRUE(output_block != nullptr); - ASSERT_GT(output_block->row_num(), 0); - ASSERT_EQ(0, output_block->pos()); - ASSERT_EQ(output_block->row_num(), output_block->limit()); - ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); - // for unordered result, k3 will be 0, 1, 2, ..., 4096*3-1 - for (int i = 0; i < output_block->row_num(); ++i) { - char* field3 = output_block->field_ptr(i, 2); - // test null bit - ASSERT_FALSE(*reinterpret_cast(field3)); - uint32_t k3 = *reinterpret_cast(field3 + 1); - ASSERT_EQ(num_rows_read, k3); - num_rows_read++; + RowBlock* output_block; + uint32_t num_rows_read = 0; + while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { + ASSERT_TRUE(output_block != nullptr); + ASSERT_GT(output_block->row_num(), 0); + ASSERT_EQ(0, output_block->pos()); + ASSERT_EQ(output_block->row_num(), output_block->limit()); + ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); + // for unordered result, k3 will be 0, 1, 2, ..., 4096*3-1 + for (int i = 0; i < output_block->row_num(); ++i) { + char* field3 = output_block->field_ptr(i, 2); + // test null bit + ASSERT_FALSE(*reinterpret_cast(field3)); + uint32_t k3 = *reinterpret_cast(field3 + 1); + ASSERT_EQ(num_rows_read, k3); + num_rows_read++; + } } + EXPECT_EQ(OLAP_ERR_DATA_EOF, s); + EXPECT_TRUE(output_block == nullptr); + EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read); + } + + // with predicate + { + std::vector column_predicates; + // column predicate: k3 < 100 + ColumnPredicate* predicate = new LessPredicate(2, 100); + column_predicates.emplace_back(predicate); + reader_context.predicates = &column_predicates; + RowsetReaderSharedPtr rowset_reader; + create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); + + RowBlock* output_block; + uint32_t num_rows_read = 0; + while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { + ASSERT_TRUE(output_block != nullptr); + ASSERT_LE(output_block->row_num(), 100); + ASSERT_EQ(0, output_block->pos()); + ASSERT_EQ(output_block->row_num(), output_block->limit()); + ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); + // for unordered result, k3 will be 0, 1, 2, ..., 99 + for (int i = 0; i < output_block->row_num(); ++i) { + char* field3 = output_block->field_ptr(i, 2); + // test null bit + ASSERT_FALSE(*reinterpret_cast(field3)); + uint32_t k3 = *reinterpret_cast(field3 + 1); + ASSERT_EQ(num_rows_read, k3); + num_rows_read++; + } + } + EXPECT_EQ(OLAP_ERR_DATA_EOF, s); + EXPECT_TRUE(output_block == nullptr); + EXPECT_EQ(100, num_rows_read); } - EXPECT_EQ(OLAP_ERR_DATA_EOF, s); - EXPECT_TRUE(output_block == nullptr); - EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read); } } diff --git a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp index d082bca5cc..119640e0c7 100644 --- a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp @@ -74,7 +74,7 @@ public: TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR); size_t size = slices.size(); Slice* values = reinterpret_cast(arena.Allocate(size * sizeof(Slice))); - ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, &arena); + ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, size, &arena); ColumnBlockView block_view(&column_block); status = page_decoder.next_batch(&size, &block_view); @@ -164,7 +164,7 @@ public: Arena arena; TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR); Slice* values = reinterpret_cast(arena.Allocate(sizeof(Slice))); - ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, &arena); + ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, 1, &arena); ColumnBlockView block_view(&column_block); size_t num = 1; diff --git a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp index 45f02422b8..112bd0d6f0 100644 --- a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp @@ -65,7 +65,7 @@ public: Slice* values = reinterpret_cast(arena.Allocate(size * sizeof(Slice))); uint8_t* null_bitmap = reinterpret_cast(arena.Allocate(BitmapSize(size))); - ColumnBlock block(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values, null_bitmap, &arena); + ColumnBlock block(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values, null_bitmap, size, &arena); ColumnBlockView column_block_view(&block); status = page_decoder.next_batch(&size, &column_block_view); @@ -78,7 +78,7 @@ public: ASSERT_EQ ("Doris", value[2].to_string()); Slice* values2 = reinterpret_cast(arena.Allocate(size * sizeof(Slice))); - ColumnBlock block2(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values2, null_bitmap, &arena); + ColumnBlock block2(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values2, null_bitmap, 1, &arena); ColumnBlockView column_block_view2(&block2); size_t fetch_num = 1; diff --git a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp index c99347119f..5ed3e138f7 100644 --- a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp @@ -37,7 +37,7 @@ public: void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) { Arena arena; uint8_t null_bitmap = 0; - ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena); + ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena); ColumnBlockView column_block_view(&block); size_t n = 1; @@ -68,7 +68,7 @@ public: CppType* values = reinterpret_cast(arena.Allocate(size * sizeof(CppType))); uint8_t* null_bitmap = reinterpret_cast(arena.Allocate(BitmapSize(size))); - ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena); + ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena); ColumnBlockView column_block_view(&block); status = page_decoder.next_batch(&size, &column_block_view); diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp index 91b82960a7..f88c35b749 100644 --- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp +++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp @@ -72,7 +72,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s std::unique_ptr field(FieldFactory::create(column)); ColumnWriter writer(writer_opts, std::move(field), true, wfile.get()); st = writer.init(); - ASSERT_TRUE(st.ok()); + ASSERT_TRUE(st.ok()) << st.to_string(); for (int i = 0; i < num_rows; ++i) { st = writer.append(BitmapTest(src_is_null, i), src + i); @@ -117,13 +117,13 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s // sequence read { st = iter->seek_to_first(); - ASSERT_TRUE(st.ok()); + ASSERT_TRUE(st.ok()) << st.to_string(); Arena arena; Type vals[1024]; Type* vals_ = vals; uint8_t is_null[1024]; - ColumnBlock col(type_info, (uint8_t*)vals, is_null, &arena); + ColumnBlock col(type_info, (uint8_t*)vals, is_null, 1024, &arena); int idx = 0; while (true) { @@ -155,7 +155,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s Arena arena; Type vals[1024]; uint8_t is_null[1024]; - ColumnBlock col(type_info, (uint8_t*)vals, is_null, &arena); + ColumnBlock col(type_info, (uint8_t*)vals, is_null, 1024, &arena); for (int rowid = 0; rowid < num_rows; rowid += 4025) { st = iter->seek_to_ordinal(rowid); diff --git a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp index 756b0963b0..641353682f 100644 --- a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp @@ -35,7 +35,7 @@ public: void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) { Arena arena; uint8_t null_bitmap = 0; - ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena); + ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena); ColumnBlockView column_block_view(&block); size_t n = 1; @@ -66,7 +66,7 @@ public: Arena arena; CppType* values = reinterpret_cast(arena.Allocate(size * sizeof(CppType))); uint8_t* null_bitmap = reinterpret_cast(arena.Allocate(BitmapSize(size))); - ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena); + ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena); ColumnBlockView column_block_view(&block); size_t size_to_fetch = size; status = for_page_decoder.next_batch(&size_to_fetch, &column_block_view); diff --git a/be/test/olap/rowset/segment_v2/plain_page_test.cpp b/be/test/olap/rowset/segment_v2/plain_page_test.cpp index 30b6bd4318..635a5d4e6a 100644 --- a/be/test/olap/rowset/segment_v2/plain_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/plain_page_test.cpp @@ -47,7 +47,7 @@ public: void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) { Arena arena; uint8_t null_bitmap = 0; - ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena); + ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena); ColumnBlockView column_block_view(&block); size_t n = 1; @@ -78,7 +78,7 @@ public: CppType* values = reinterpret_cast(arena.Allocate(size * sizeof(CppType))); uint8_t* null_bitmap = reinterpret_cast(arena.Allocate(BitmapSize(size))); - ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena); + ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena); ColumnBlockView column_block_view(&block); status = page_decoder.next_batch(&size, &column_block_view); ASSERT_TRUE(status.ok()); diff --git a/be/test/olap/rowset/segment_v2/rle_page_test.cpp b/be/test/olap/rowset/segment_v2/rle_page_test.cpp index 97015950ab..d355cd5e15 100644 --- a/be/test/olap/rowset/segment_v2/rle_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/rle_page_test.cpp @@ -38,7 +38,7 @@ public: void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) { Arena arena; uint8_t null_bitmap = 0; - ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena); + ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena); ColumnBlockView column_block_view(&block); size_t n = 1; @@ -70,7 +70,7 @@ public: CppType* values = reinterpret_cast(arena.Allocate(size * sizeof(CppType))); uint8_t* null_bitmap = reinterpret_cast(arena.Allocate(BitmapSize(size))); - ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena); + ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena); ColumnBlockView column_block_view(&block); size_t size_to_fetch = size; status = rle_page_decoder.next_batch(&size_to_fetch, &column_block_view); diff --git a/be/test/olap/selection_vector_test.cpp b/be/test/olap/selection_vector_test.cpp new file mode 100644 index 0000000000..4bacc06125 --- /dev/null +++ b/be/test/olap/selection_vector_test.cpp @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "olap/selection_vector.h" + +namespace doris { + +class SelectionVectorTest : public testing::Test { + +}; + +TEST_F(SelectionVectorTest, Normal) { + SelectionVector sel_vel(10); + ASSERT_EQ(10, sel_vel.nrows()); + sel_vel.set_all_true(); + ASSERT_EQ(" 0: 11111111 11 \n", sel_vel.to_string()); + sel_vel.set_all_false(); + ASSERT_EQ(" 0: 00000000 00 \n", sel_vel.to_string()); + sel_vel.set_row_selected(7); + ASSERT_TRUE(sel_vel.is_row_selected(7)); + ASSERT_TRUE(sel_vel.any_selected()); + ASSERT_EQ(" 0: 00000001 00 \n", sel_vel.to_string()); + sel_vel.clear_bit(7); + ASSERT_EQ(" 0: 00000000 00 \n", sel_vel.to_string()); +} + +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/run-ut.sh b/run-ut.sh index 6edfa50d1d..80b0bf49fd 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -278,6 +278,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/short_key_index_test ${DORIS_TEST_BINARY_DIR}/olap/key_coder_test ${DORIS_TEST_BINARY_DIR}/olap/page_cache_test ${DORIS_TEST_BINARY_DIR}/olap/hll_test +${DORIS_TEST_BINARY_DIR}/olap/selection_vector_test # Running routine load test ${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test