add predicate filter(#1652) (#1775)

This commit is contained in:
kangpinghuang
2019-10-17 19:20:00 +08:00
committed by lichaoyong
parent 3bca253fb3
commit 4f7cc7e033
30 changed files with 639 additions and 133 deletions

View File

@ -33,8 +33,8 @@ class ColumnBlockCell;
// It doesn't own any data, user should keep the life of input data.
class ColumnBlock {
public:
ColumnBlock(const TypeInfo* type_info, uint8_t* data, uint8_t* null_bitmap, Arena* arena)
: _type_info(type_info), _data(data), _null_bitmap(null_bitmap), _arena(arena) { }
ColumnBlock(const TypeInfo* type_info, uint8_t* data, uint8_t* null_bitmap, size_t nrows, Arena* arena)
: _type_info(type_info), _data(data), _null_bitmap(null_bitmap), _nrows(nrows), _arena(arena) { }
const TypeInfo* type_info() const { return _type_info; }
uint8_t* data() const { return _data; }
@ -53,10 +53,14 @@ public:
}
ColumnBlockCell cell(size_t idx) const;
size_t nrows() const { return _nrows; }
private:
const TypeInfo* _type_info;
uint8_t* _data;
uint8_t* _null_bitmap;
size_t _nrows;
Arena* _arena;
};

View File

@ -18,16 +18,29 @@
#ifndef DORIS_BE_SRC_OLAP_COLUMN_PREDICATE_H
#define DORIS_BE_SRC_OLAP_COLUMN_PREDICATE_H
#include "olap/column_block.h"
#include "olap/selection_vector.h"
namespace doris {
class VectorizedRowBatch;
class ColumnPredicate {
public:
explicit ColumnPredicate(uint32_t column_id) : _column_id(column_id) { }
virtual ~ColumnPredicate() {}
//evaluate predicate on VectorizedRowBatch
virtual void evaluate(VectorizedRowBatch* batch) const = 0;
// evaluate predicate on ColumnBlock
virtual void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const = 0;
uint32_t column_id() const { return _column_id; }
protected:
uint32_t _column_id;
};
} //namespace doris

View File

@ -24,8 +24,8 @@ namespace doris {
#define COMPARISON_PRED_CONSTRUCTOR(CLASS) \
template<class type> \
CLASS<type>::CLASS(int column_id, const type& value) \
: _column_id(column_id), \
CLASS<type>::CLASS(uint32_t column_id, const type& value) \
: ColumnPredicate(column_id), \
_value(value) \
{} \
@ -38,8 +38,8 @@ COMPARISON_PRED_CONSTRUCTOR(GreaterEqualPredicate)
#define COMPARISON_PRED_CONSTRUCTOR_STRING(CLASS) \
template<> \
CLASS<StringValue>::CLASS(int column_id, const StringValue& value) \
: _column_id(column_id) \
CLASS<StringValue>::CLASS(uint32_t column_id, const StringValue& value) \
: ColumnPredicate(column_id) \
{ \
_value.len = value.len; \
_value.ptr = value.ptr; \
@ -102,7 +102,6 @@ COMPARISON_PRED_CONSTRUCTOR_STRING(GreaterEqualPredicate)
} \
} \
COMPARISON_PRED_EVALUATE(EqualPredicate, ==)
COMPARISON_PRED_EVALUATE(NotEqualPredicate, !=)
COMPARISON_PRED_EVALUATE(LessPredicate, <)
@ -110,18 +109,47 @@ COMPARISON_PRED_EVALUATE(LessEqualPredicate, <=)
COMPARISON_PRED_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_EVALUATE(GreaterEqualPredicate, >=)
#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \
template<class type> \
void CLASS<type>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \
uint16_t new_size = 0; \
if (block->is_nullable()) { \
for (uint16_t i = 0; i < *size; ++i) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const type* cell_value = reinterpret_cast<const type*>(block->cell(idx).cell_ptr()); \
new_size += (!block->cell(idx).is_null() && (*cell_value OP _value)); \
} \
} else { \
for (uint16_t i = 0; i < *size; ++i) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const type* cell_value = reinterpret_cast<const type*>(block->cell(idx).cell_ptr()); \
new_size += (*cell_value OP _value); \
} \
} \
*size = new_size; \
} \
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(EqualPredicate, ==)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(NotEqualPredicate, !=)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(LessPredicate, <)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(LessEqualPredicate, <=)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
#define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS) \
template CLASS<int8_t>::CLASS(int column_id, const int8_t& value); \
template CLASS<int16_t>::CLASS(int column_id, const int16_t& value); \
template CLASS<int32_t>::CLASS(int column_id, const int32_t& value); \
template CLASS<int64_t>::CLASS(int column_id, const int64_t& value); \
template CLASS<int128_t>::CLASS(int column_id, const int128_t& value); \
template CLASS<float>::CLASS(int column_id, const float& value); \
template CLASS<double>::CLASS(int column_id, const double& value); \
template CLASS<decimal12_t>::CLASS(int column_id, const decimal12_t& value); \
template CLASS<StringValue>::CLASS(int column_id, const StringValue& value); \
template CLASS<uint24_t>::CLASS(int column_id, const uint24_t& value); \
template CLASS<uint64_t>::CLASS(int column_id, const uint64_t& value); \
template CLASS<int8_t>::CLASS(uint32_t column_id, const int8_t& value); \
template CLASS<int16_t>::CLASS(uint32_t column_id, const int16_t& value); \
template CLASS<int32_t>::CLASS(uint32_t column_id, const int32_t& value); \
template CLASS<int64_t>::CLASS(uint32_t column_id, const int64_t& value); \
template CLASS<int128_t>::CLASS(uint32_t column_id, const int128_t& value); \
template CLASS<float>::CLASS(uint32_t column_id, const float& value); \
template CLASS<double>::CLASS(uint32_t column_id, const double& value); \
template CLASS<decimal12_t>::CLASS(uint32_t column_id, const decimal12_t& value); \
template CLASS<StringValue>::CLASS(uint32_t column_id, const StringValue& value); \
template CLASS<uint24_t>::CLASS(uint32_t column_id, const uint24_t& value); \
template CLASS<uint64_t>::CLASS(uint32_t column_id, const uint64_t& value); \
COMPARISON_PRED_CONSTRUCTOR_DECLARATION(EqualPredicate)
COMPARISON_PRED_CONSTRUCTOR_DECLARATION(NotEqualPredicate)
@ -150,4 +178,24 @@ COMPARISON_PRED_EVALUATE_DECLARATION(LessEqualPredicate)
COMPARISON_PRED_EVALUATE_DECLARATION(GreaterPredicate)
COMPARISON_PRED_EVALUATE_DECLARATION(GreaterEqualPredicate)
#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS) \
template void CLASS<int8_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<int16_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<int32_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<int64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<int128_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<float>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<double>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<decimal12_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<StringValue>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<uint24_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<uint64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(EqualPredicate)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotEqualPredicate)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(LessPredicate)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(LessEqualPredicate)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(GreaterPredicate)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(GreaterEqualPredicate)
} //namespace doris

View File

@ -29,11 +29,11 @@ class VectorizedRowBatch;
template <class type> \
class CLASS : public ColumnPredicate { \
public: \
CLASS(int column_id, const type& value); \
CLASS(uint32_t column_id, const type& value); \
virtual ~CLASS() { } \
virtual void evaluate(VectorizedRowBatch* batch) const override; \
void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override; \
private: \
int32_t _column_id; \
type _value; \
}; \

View File

@ -94,6 +94,7 @@ Status AutoIncrementIterator::next_batch(RowBlockV2* block) {
_rows_returned++;
}
block->set_num_rows(row_idx);
block->set_selected_size(row_idx);
if (row_idx > 0) {
return Status::OK();
}
@ -125,7 +126,8 @@ public:
// Before call this function, Client must assure that
// valid() return true
RowBlockRow current_row() const {
return RowBlockRow(&_block, _index_in_block);
uint16_t* selection_vector = _block.selection_vector();
return RowBlockRow(&_block, selection_vector[_index_in_block]);
}
// Advance internal row index to next valid row
@ -148,22 +150,23 @@ private:
RowBlockV2 _block;
bool _valid = false;
size_t _index_in_block = 0;
size_t _index_in_block = -1;
};
Status MergeIteratorContext::init(const StorageReadOptions& opts) {
RETURN_IF_ERROR(_iter->init(opts));
RETURN_IF_ERROR(_load_next_block());
if (valid()) {
RETURN_IF_ERROR(advance());
}
return Status::OK();
}
Status MergeIteratorContext::advance() {
// NOTE: we increase _index_in_block directly to valid one check
_index_in_block++;
do {
for (; _index_in_block < _block.num_rows(); ++_index_in_block) {
// TODO(zc): we can skip rows that is fitered by conjunts here
// Now we return directly
_index_in_block++;
if (_index_in_block < _block.selected_size()) {
return Status::OK();
}
// current batch has no data, load next batch
@ -186,7 +189,7 @@ Status MergeIteratorContext::_load_next_block() {
}
}
} while (_block.num_rows() == 0);
_index_in_block = 0;
_index_in_block = -1;
_valid = true;
return Status::OK();
}
@ -266,6 +269,7 @@ Status MergeIterator::next_batch(RowBlockV2* block) {
}
}
block->set_num_rows(row_idx);
block->set_selected_size(row_idx);
if (row_idx > 0) {
return Status::OK();
} else {

View File

@ -24,8 +24,8 @@ namespace doris {
#define IN_LIST_PRED_CONSTRUCTOR(CLASS) \
template<class type> \
CLASS<type>::CLASS(int column_id, std::set<type>&& values) \
: _column_id(column_id), \
CLASS<type>::CLASS(uint32_t column_id, std::set<type>&& values) \
: ColumnPredicate(column_id), \
_values(std::move(values)) {} \
IN_LIST_PRED_CONSTRUCTOR(InListPredicate)
@ -84,18 +84,43 @@ void CLASS<type>::evaluate(VectorizedRowBatch* batch) const { \
IN_LIST_PRED_EVALUATE(InListPredicate, !=)
IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \
template<class type> \
void CLASS<type>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \
uint16_t new_size = 0; \
if (block->is_nullable()) { \
for (uint16_t i = 0; i < *size; ++i) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const type* cell_value = reinterpret_cast<const type*>(block->cell(idx).cell_ptr()); \
new_size += (!block->cell(idx).is_null() && _values.find(*cell_value) OP _values.end()); \
} \
} else { \
for (uint16_t i = 0; i < *size; ++i) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const type* cell_value = reinterpret_cast<const type*>(block->cell(idx).cell_ptr()); \
new_size += (_values.find(*cell_value) OP _values.end()); \
} \
} \
*size = new_size; \
} \
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, ==)
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, !=)
#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \
template CLASS<int8_t>::CLASS(int column_id, std::set<int8_t>&& values); \
template CLASS<int16_t>::CLASS(int column_id, std::set<int16_t>&& values); \
template CLASS<int32_t>::CLASS(int column_id, std::set<int32_t>&& values); \
template CLASS<int64_t>::CLASS(int column_id, std::set<int64_t>&& values); \
template CLASS<int128_t>::CLASS(int column_id, std::set<int128_t>&& values); \
template CLASS<float>::CLASS(int column_id, std::set<float>&& values); \
template CLASS<double>::CLASS(int column_id, std::set<double>&& values); \
template CLASS<decimal12_t>::CLASS(int column_id, std::set<decimal12_t>&& values); \
template CLASS<StringValue>::CLASS(int column_id, std::set<StringValue>&& values); \
template CLASS<uint24_t>::CLASS(int column_id, std::set<uint24_t>&& values); \
template CLASS<uint64_t>::CLASS(int column_id, std::set<uint64_t>&& values); \
template CLASS<int8_t>::CLASS(uint32_t column_id, std::set<int8_t>&& values); \
template CLASS<int16_t>::CLASS(uint32_t column_id, std::set<int16_t>&& values); \
template CLASS<int32_t>::CLASS(uint32_t column_id, std::set<int32_t>&& values); \
template CLASS<int64_t>::CLASS(uint32_t column_id, std::set<int64_t>&& values); \
template CLASS<int128_t>::CLASS(uint32_t column_id, std::set<int128_t>&& values); \
template CLASS<float>::CLASS(uint32_t column_id, std::set<float>&& values); \
template CLASS<double>::CLASS(uint32_t column_id, std::set<double>&& values); \
template CLASS<decimal12_t>::CLASS(uint32_t column_id, std::set<decimal12_t>&& values); \
template CLASS<StringValue>::CLASS(uint32_t column_id, std::set<StringValue>&& values); \
template CLASS<uint24_t>::CLASS(uint32_t column_id, std::set<uint24_t>&& values); \
template CLASS<uint64_t>::CLASS(uint32_t column_id, std::set<uint64_t>&& values); \
IN_LIST_PRED_CONSTRUCTOR_DECLARATION(InListPredicate)
IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate)
@ -115,4 +140,21 @@ IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate)
IN_LIST_PRED_EVALUATE_DECLARATION(InListPredicate)
IN_LIST_PRED_EVALUATE_DECLARATION(NotInListPredicate)
#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS) \
template void CLASS<int8_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<int16_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<int32_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<int64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<int128_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<float>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<double>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<decimal12_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<StringValue>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<uint24_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
template void CLASS<uint64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(InListPredicate)
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate)
} //namespace doris

View File

@ -30,11 +30,11 @@ class VectorizedRowBatch;
template <class type> \
class CLASS : public ColumnPredicate { \
public: \
CLASS(int column_id, std::set<type>&& values); \
CLASS(uint32_t column_id, std::set<type>&& values); \
virtual ~CLASS() {} \
virtual void evaluate(VectorizedRowBatch* batch) const override; \
void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override; \
private: \
int32_t _column_id; \
std::set<type> _values; \
}; \

View File

@ -27,6 +27,7 @@ class RowCursor;
class RowBlockV2;
class Schema;
class Conditions;
class ColumnPredicate;
class StorageReadOptions {
public:
@ -69,6 +70,11 @@ public:
// delete conditions used by column index to filter pages
std::vector<const Conditions*> delete_conditions;
// reader's column predicate, nullptr if not existed
// used to fiter rows in row block
// TODO(hkp): refactor the column predicate framework
// to unify Conditions and ColumnPredicate
const std::vector<ColumnPredicate*>* column_predicates = nullptr;
};
// Used to read data in RowBlockV2 one by one

View File

@ -22,8 +22,8 @@
namespace doris {
NullPredicate::NullPredicate(int32_t column_id, bool is_null)
: _column_id(column_id), _is_null(is_null) {}
NullPredicate::NullPredicate(uint32_t column_id, bool is_null)
: ColumnPredicate(column_id), _is_null(is_null) {}
NullPredicate::~NullPredicate() {}
@ -60,4 +60,18 @@ void NullPredicate::evaluate(VectorizedRowBatch* batch) const {
}
}
void NullPredicate::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const {
uint16_t new_size = 0;
if (!block->is_nullable() && _is_null) {
*size = 0;
return;
}
for (uint16_t i = 0; i < *size; ++i) {
uint16_t idx = sel[i];
sel[new_size] = idx;
new_size += (block->cell(idx).is_null() == _is_null);
}
*size = new_size;
}
} //namespace doris

View File

@ -18,7 +18,6 @@
#ifndef DORIS_BE_SRC_OLAP_NULL_PREDICATE_H
#define DORIS_BE_SRC_OLAP_NULL_PREDICATE_H
#include <stdint.h>
#include "olap/column_predicate.h"
namespace doris {
@ -27,12 +26,14 @@ class VectorizedRowBatch;
class NullPredicate : public ColumnPredicate {
public:
NullPredicate(int32_t column_id, bool is_null);
NullPredicate(uint32_t column_id, bool is_null);
virtual ~NullPredicate();
virtual void evaluate(VectorizedRowBatch* batch) const override;
void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override;
private:
int32_t _column_id;
bool _is_null; //true for null, false for not null
};

View File

@ -30,7 +30,8 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity)
: _schema(schema),
_capacity(capacity),
_column_datas(_schema.num_columns(), nullptr),
_column_null_bitmaps(_schema.num_columns(), nullptr) {
_column_null_bitmaps(_schema.num_columns(), nullptr),
_selection_vector(nullptr) {
auto bitmap_size = BitmapSize(capacity);
for (auto cid : _schema.column_ids()) {
size_t data_size = _schema.column(cid)->type_info()->size() * _capacity;
@ -40,6 +41,7 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity)
_column_null_bitmaps[cid] = new uint8_t[bitmap_size];;
}
}
_selection_vector = new uint16_t[_capacity];
clear();
}
@ -50,6 +52,7 @@ RowBlockV2::~RowBlockV2() {
for (auto null_bitmap : _column_null_bitmaps) {
delete[] null_bitmap;
}
delete[] _selection_vector;
}
Status RowBlockV2::copy_to_row_cursor(size_t row_idx, RowCursor* cursor) {

View File

@ -25,6 +25,7 @@
#include "olap/column_block.h"
#include "olap/schema.h"
#include "olap/types.h"
#include "olap/selection_vector.h"
namespace doris {
@ -54,6 +55,10 @@ public:
void clear() {
_num_rows = 0;
_arena.reset(new Arena);
_selected_size = _capacity;
for (int i = 0; i < _selected_size; ++i) {
_selection_vector[i] = i;
}
}
// Copy the row_idx row's data into given row_cursor.
@ -67,13 +72,25 @@ public:
const TypeInfo* type_info = _schema.column(cid)->type_info();
uint8_t* data = _column_datas[cid];
uint8_t* null_bitmap = _column_null_bitmaps[cid];
return ColumnBlock(type_info, data, null_bitmap, _arena.get());
return ColumnBlock(type_info, data, null_bitmap, _capacity, _arena.get());
}
RowBlockRow row(size_t row_idx) const;
const Schema* schema() const { return &_schema; }
uint16_t* selection_vector() const {
return _selection_vector;
}
uint16_t selected_size() const {
return _selected_size;
}
void set_selected_size(uint16_t selected_size) {
_selected_size = selected_size;
}
private:
Schema _schema;
size_t _capacity;
@ -88,6 +105,11 @@ private:
size_t _num_rows;
// manages the memory for slice's data
std::unique_ptr<Arena> _arena;
// index of selected rows for rows passed the predicate
uint16_t* _selection_vector;
// selected rows number
uint16_t _selected_size;
};
// Stands for a row in RowBlockV2. It is consisted of a RowBlockV2 reference

View File

@ -52,6 +52,7 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) {
read_context->delete_handler->get_delete_conditions_after_version(_rowset->end_version(),
&read_options.delete_conditions);
}
read_options.column_predicates = read_context->predicates;
// create iterator for each segment
std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators;
@ -117,7 +118,10 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) {
}
// convert to output block
_output_block->clear();
for (size_t row_idx = 0; row_idx < _input_block->num_rows(); ++row_idx) {
size_t rows_read = 0;
uint16_t* selection_vector = _input_block->selection_vector();
for (size_t i = 0; i < _input_block->selected_size(); ++i) {
uint16_t row_idx = selection_vector[i];
// shallow copy row from input block to output block
_output_block->get_row(row_idx, _row.get());
// this copy function will copy return_columns' row to seek_columns's row_cursor
@ -126,10 +130,11 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) {
LOG(WARNING) << "failed to copy row: " << s.to_string();
return OLAP_ERR_ROWSET_READ_FAILED;
}
++rows_read;
}
_output_block->set_pos(0);
_output_block->set_limit(_input_block->num_rows());
_output_block->finalize(_input_block->num_rows());
_output_block->set_limit(rows_read);
_output_block->finalize(rows_read);
*block = _output_block.get();
// update raw_rows_read counter
_context->stats->raw_rows_read += _input_block->num_rows();

View File

@ -206,7 +206,7 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) {
// And then copy the strings corresponding to the codewords to the destination buffer
TypeInfo *type_info = get_type_info(OLAP_FIELD_TYPE_INT);
// the data in page is not null
ColumnBlock column_block(type_info, _code_buf.data(), nullptr, dst->column_block()->arena());
ColumnBlock column_block(type_info, _code_buf.data(), nullptr, *n, dst->column_block()->arena());
ColumnBlockView tmp_block_view(&column_block);
RETURN_IF_ERROR(_data_page_decoder->next_batch(n, &tmp_block_view));
for (int i = 0; i < *n; ++i) {

View File

@ -21,6 +21,7 @@
#include "olap/rowset/segment_v2/bitshuffle_page.h"
#include "olap/rowset/segment_v2/rle_page.h"
#include "olap/rowset/segment_v2/binary_dict_page.h"
#include "gutil/strings/substitute.h"
namespace doris {
namespace segment_v2 {
@ -167,7 +168,9 @@ Status EncodingInfoResolver::get(
auto key = std::make_pair(data_type, encoding_type);
auto it = _encoding_map.find(key);
if (it == std::end(_encoding_map)) {
return Status::InternalError("fail to find valid type encoding");
return Status::InternalError(
strings::Substitute("fail to find valid type encoding, type:$0, encoding:$1",
data_type, encoding_type));
}
*out = it->second;
return Status::OK();

View File

@ -26,6 +26,7 @@
#include "olap/row_block2.h"
#include "olap/row_cursor.h"
#include "olap/short_key_index.h"
#include "olap/column_predicate.h"
using strings::Substitute;
@ -310,34 +311,50 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
block->set_num_rows(0);
return Status::EndOfFile("no more data in segment");
}
size_t rows_to_read = block->capacity();
while (rows_to_read > 0) {
if (_cur_rowid >= _row_ranges.get_range_to(_cur_range_id)) {
// current row range is read over, trying to read from next range
if (_cur_range_id >= _row_ranges.range_size() - 1) {
// there is no more row range
break;
}
// check whether need to seek
if (_cur_rowid >= _row_ranges.get_range_to(_cur_range_id)) {
while (true) {
// step to next row range
++_cur_range_id;
_cur_rowid = _row_ranges.get_range_from(_cur_range_id);
// current row range is read over, trying to read from next range
if (_cur_range_id >= _row_ranges.range_size() - 1) {
block->set_num_rows(0);
return Status::EndOfFile("no more data in segment");
}
if (_row_ranges.get_range_count(_cur_range_id) == 0) {
// current row range is empty, just skip seek
continue;
}
_cur_rowid = _row_ranges.get_range_from(_cur_range_id);
for (auto cid : block->schema()->column_ids()) {
RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_cur_rowid));
}
break;
}
size_t to_read_in_range = std::min(rows_to_read, size_t(_row_ranges.get_range_to(_cur_range_id) - _cur_rowid));
RETURN_IF_ERROR(_next_batch(block, &to_read_in_range));
_cur_rowid += to_read_in_range;
rows_to_read -= to_read_in_range;
}
block->set_num_rows(block->capacity() - rows_to_read);
// next_batch just return the rows in current row range
// it is easier to realize lazy materialization in the future
size_t rows_to_read = std::min(block->capacity(), size_t(_row_ranges.get_range_to(_cur_range_id) - _cur_rowid));
RETURN_IF_ERROR(_next_batch(block, &rows_to_read));
_cur_rowid += rows_to_read;
block->set_num_rows(rows_to_read);
if (block->num_rows() == 0) {
return Status::EndOfFile("no more data in segment");
}
// column predicate vectorization execution
// TODO(hkp): lazy materialization
// TODO(hkp): optimize column predicate to check column block once for one column
if (_opts.column_predicates != nullptr) {
// init selection position index
uint16_t selected_size = block->selected_size();
for (auto column_predicate : *_opts.column_predicates) {
auto column_block = block->column_block(column_predicate->column_id());
column_predicate->evaluate(&column_block, block->selection_vector(), &selected_size);
}
block->set_selected_size(selected_size);
}
return Status::OK();
}

View File

@ -0,0 +1,172 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include "util/bitmap.h"
namespace doris {
// Bit-vector representing the selection status of each row in a row block.
//
// It is used to scan data with column predicate.
// the bit in the selection vector is used to indicate whether the given row is live.
class SelectionVector {
public:
// Construct a new vector. The bits are initially in an indeterminate state.
// Call set_all_true() if you require all rows to be initially selected.
explicit SelectionVector(size_t row_capacity)
: _n_rows(row_capacity),
_n_bytes(BitmapSize(row_capacity)),
_bitmap(new uint8_t[_n_bytes]) {
CHECK_GT(_n_bytes, 0);
set_all_false();
}
// returen the number of selected rows.
size_t count_selected() const {
return Bits::Count(_bitmap.get(), _n_bytes);
}
// Return true if any rows are selected, or false
// This is equivalent to (count_selected() > 0), but faster.
inline bool any_selected() const;
bool is_row_selected(size_t row) const {
DCHECK_LT(row, _n_rows);
return BitmapTest(_bitmap.get(), row);
}
void set_row_selected(size_t row) {
DCHECK_LT(row, _n_rows);
BitmapSet(_bitmap.get(), row);
}
void set_all_true() {
memset(_bitmap.get(), 0xff, _n_bytes);
pad_extra_bits_wit_zeroes();
}
void set_all_false() {
memset(_bitmap.get(), 0, _n_bytes);
}
void clear_bit(size_t row) {
DCHECK_LT(row, _n_rows);
return BitmapClear(_bitmap.get(), row);
}
uint8_t* mutable_bitmap() { return _bitmap.get(); }
const uint8_t* bitmap() const { return _bitmap.get(); }
size_t nrows() const { return _n_rows; }
std::string to_string() const {
return BitmapToString(_bitmap.get(), _n_rows);
}
private:
// Pads any non-byte-aligned bits at the end of the SelectionVector with
// zeroes.
//
// To improve performance, CountSelected() and AnySelected() evaluate the
// SelectionVector's bitmap in terms of bytes. As such, they consider all of
// the trailing bits, even if the bitmap's bit length is not byte-aligned and
// some trailing bits aren't part of the bitmap.
//
// To address this without sacrificing performance, we need to zero out all
// trailing bits at construction time, or after any operation that sets all
// bytes in bulk.
void pad_extra_bits_wit_zeroes() {
size_t bits_in_last_byte = _n_rows & 7;
if (bits_in_last_byte > 0) {
BitmapChangeBits(_bitmap.get(), _n_rows, 8 - bits_in_last_byte, false);
}
}
private:
// row capacity
size_t _n_rows;
size_t _n_bytes;
std::unique_ptr<uint8_t[]> _bitmap;
DISALLOW_COPY_AND_ASSIGN(SelectionVector);
};
inline bool SelectionVector::any_selected() const {
size_t rem = _n_bytes;
const uint32_t* p32 = reinterpret_cast<const uint32_t*>(_bitmap.get());
while (rem >= 4) {
if (*p32 != 0) {
return true;
}
++p32;
rem -= 4;
}
const uint8_t* p8 = reinterpret_cast<const uint8_t*>(p32);
while (rem > 0) {
if (*p8 != 0) {
return true;
}
++p8;
--rem;
}
return false;
}
// A SelectionVectorView keeps track of where in the selection vector a given
// batch will start from. After processing a batch, Advance() should be called
// and the view will move forward by the appropriate amount. In this way, the
// underlying selection vector can easily be updated batch-by-batch.
class SelectionVectorView {
public:
// Constructs a new SelectionVectorView.
//
// The 'sel_vec' object must outlive this SelectionVectorView.
explicit SelectionVectorView(SelectionVector* sel_vec)
: _sel_vec(sel_vec), _row_offset(0) {}
void advance(size_t skip) {
DCHECK_LE(skip, _sel_vec->nrows() - _row_offset);
_row_offset += skip;
}
void set_bit(size_t row_idx) {
DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
BitmapSet(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
}
void clear_bit(size_t row_idx) {
DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
BitmapClear(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
}
bool test_bit(size_t row_idx) {
DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
return BitmapTest(_sel_vec->bitmap(), _row_offset + row_idx);
}
void clear_bits(size_t nrows) {
DCHECK_LE(nrows, _sel_vec->nrows() - _row_offset);
BitmapChangeBits(_sel_vec->mutable_bitmap(), _row_offset, nrows, false);
}
private:
SelectionVector* _sel_vec;
size_t _row_offset;
};
}

View File

@ -19,6 +19,8 @@
#include <sstream>
#include "gutil/stringprintf.h"
namespace doris {
std::string Bitmap::DebugString(bool print_bits) const {
@ -132,4 +134,21 @@ bool BitmapFindFirst(const uint8_t *bitmap, size_t offset, size_t bitmap_size,
return false;
}
std::string BitmapToString(const uint8_t *bitmap, size_t num_bits) {
std::string s;
size_t index = 0;
while (index < num_bits) {
StringAppendF(&s, "%4zu: ", index);
for (int i = 0; i < 8 && index < num_bits; ++i) {
for (int j = 0; j < 8 && index < num_bits; ++j) {
StringAppendF(&s, "%d", BitmapTest(bitmap, index));
index++;
}
StringAppendF(&s, " ");
}
StringAppendF(&s, "\n");
}
return s;
}
}

View File

@ -113,6 +113,13 @@ inline bool BitmapEquals(const uint8_t* bm1, const uint8_t* bm2, size_t bitmap_s
return (bm1[num_full_bytes] & mask) == (bm2[num_full_bytes] & mask);
}
// This function will print the bitmap content in a format like the following:
// eg: 0001110000100010110011001100001100110011
// output:
// 0000: 00011100 00100010 11001100 11000011
// 0016: 00110011
std::string BitmapToString(const uint8_t *bitmap, size_t num_bits);
// Iterator which yields ranges of set and unset bits.
// Example usage:
// bool value;

View File

@ -73,3 +73,4 @@ ADD_BE_TEST(short_key_index_test)
ADD_BE_TEST(page_cache_test)
ADD_BE_TEST(hll_test)
ADD_BE_TEST(memtable_flush_executor_test)
ADD_BE_TEST(selection_vector_test)

View File

@ -30,6 +30,7 @@
#include "olap/storage_engine.h"
#include "olap/tablet_schema.h"
#include "olap/utils.h"
#include "olap/comparison_predicate.h"
#include "runtime/mem_tracker.h"
#include "runtime/mem_pool.h"
#include "util/slice.h"
@ -184,37 +185,75 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) {
reader_context.seek_columns = &return_columns;
reader_context.stats = &_stats;
RowsetReaderSharedPtr rowset_reader;
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
// without predicates
{
RowsetReaderSharedPtr rowset_reader;
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
RowBlock* output_block;
uint32_t num_rows_read = 0;
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
ASSERT_TRUE(output_block != nullptr);
ASSERT_GT(output_block->row_num(), 0);
ASSERT_EQ(0, output_block->pos());
ASSERT_EQ(output_block->row_num(), output_block->limit());
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
// after sort merge segments, k1 will be 0, 1, 2, 10, 11, 12, 20, 21, 22, ..., 40950, 40951, 40952
for (int i = 0; i < output_block->row_num(); ++i) {
char* field1 = output_block->field_ptr(i, 0);
char* field2 = output_block->field_ptr(i, 1);
// test null bit
ASSERT_FALSE(*reinterpret_cast<bool*>(field1));
ASSERT_FALSE(*reinterpret_cast<bool*>(field2));
uint32_t k1 = *reinterpret_cast<uint32_t*>(field1 + 1);
uint32_t k2 = *reinterpret_cast<uint32_t*>(field2 + 1);
ASSERT_EQ(k1 * 10, k2);
RowBlock* output_block;
uint32_t num_rows_read = 0;
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
ASSERT_TRUE(output_block != nullptr);
ASSERT_GT(output_block->row_num(), 0);
ASSERT_EQ(0, output_block->pos());
ASSERT_EQ(output_block->row_num(), output_block->limit());
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
// after sort merge segments, k1 will be 0, 1, 2, 10, 11, 12, 20, 21, 22, ..., 40950, 40951, 40952
for (int i = 0; i < output_block->row_num(); ++i) {
char* field1 = output_block->field_ptr(i, 0);
char* field2 = output_block->field_ptr(i, 1);
// test null bit
ASSERT_FALSE(*reinterpret_cast<bool*>(field1));
ASSERT_FALSE(*reinterpret_cast<bool*>(field2));
uint32_t k1 = *reinterpret_cast<uint32_t*>(field1 + 1);
uint32_t k2 = *reinterpret_cast<uint32_t*>(field2 + 1);
ASSERT_EQ(k1 * 10, k2);
int rid = num_rows_read / 3;
int seg_id = num_rows_read % 3;
ASSERT_EQ(rid * 10 + seg_id, k1);
num_rows_read++;
int rid = num_rows_read / 3;
int seg_id = num_rows_read % 3;
ASSERT_EQ(rid * 10 + seg_id, k1);
num_rows_read++;
}
}
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
EXPECT_TRUE(output_block == nullptr);
EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
}
// merge segments with predicates
{
std::vector<ColumnPredicate*> column_predicates;
// column predicate: k1 = 10
std::unique_ptr<ColumnPredicate> predicate(new EqualPredicate<int32_t>(0, 10));
column_predicates.emplace_back(predicate.get());
reader_context.predicates = &column_predicates;
RowsetReaderSharedPtr rowset_reader;
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
RowBlock* output_block;
uint32_t num_rows_read = 0;
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
ASSERT_TRUE(output_block != nullptr);
ASSERT_EQ(1, output_block->row_num());
ASSERT_EQ(0, output_block->pos());
ASSERT_EQ(output_block->row_num(), output_block->limit());
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
// after sort merge segments, k1 will be 10
for (int i = 0; i < output_block->row_num(); ++i) {
char* field1 = output_block->field_ptr(i, 0);
char* field2 = output_block->field_ptr(i, 1);
// test null bit
ASSERT_FALSE(*reinterpret_cast<bool*>(field1));
ASSERT_FALSE(*reinterpret_cast<bool*>(field2));
uint32_t k1 = *reinterpret_cast<uint32_t*>(field1 + 1);
uint32_t k2 = *reinterpret_cast<uint32_t*>(field2 + 1);
ASSERT_EQ(10, k1);
ASSERT_EQ(k1 * 10, k2);
num_rows_read++;
}
}
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
EXPECT_TRUE(output_block == nullptr);
EXPECT_EQ(1, num_rows_read);
}
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
EXPECT_TRUE(output_block == nullptr);
EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
}
{ // test return unordered data and only k3
@ -226,30 +265,66 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) {
reader_context.seek_columns = &return_columns;
reader_context.stats = &_stats;
RowsetReaderSharedPtr rowset_reader;
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
// without predicate
{
RowsetReaderSharedPtr rowset_reader;
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
RowBlock* output_block;
uint32_t num_rows_read = 0;
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
ASSERT_TRUE(output_block != nullptr);
ASSERT_GT(output_block->row_num(), 0);
ASSERT_EQ(0, output_block->pos());
ASSERT_EQ(output_block->row_num(), output_block->limit());
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
// for unordered result, k3 will be 0, 1, 2, ..., 4096*3-1
for (int i = 0; i < output_block->row_num(); ++i) {
char* field3 = output_block->field_ptr(i, 2);
// test null bit
ASSERT_FALSE(*reinterpret_cast<bool*>(field3));
uint32_t k3 = *reinterpret_cast<uint32_t*>(field3 + 1);
ASSERT_EQ(num_rows_read, k3);
num_rows_read++;
RowBlock* output_block;
uint32_t num_rows_read = 0;
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
ASSERT_TRUE(output_block != nullptr);
ASSERT_GT(output_block->row_num(), 0);
ASSERT_EQ(0, output_block->pos());
ASSERT_EQ(output_block->row_num(), output_block->limit());
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
// for unordered result, k3 will be 0, 1, 2, ..., 4096*3-1
for (int i = 0; i < output_block->row_num(); ++i) {
char* field3 = output_block->field_ptr(i, 2);
// test null bit
ASSERT_FALSE(*reinterpret_cast<bool*>(field3));
uint32_t k3 = *reinterpret_cast<uint32_t*>(field3 + 1);
ASSERT_EQ(num_rows_read, k3);
num_rows_read++;
}
}
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
EXPECT_TRUE(output_block == nullptr);
EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
}
// with predicate
{
std::vector<ColumnPredicate*> column_predicates;
// column predicate: k3 < 100
ColumnPredicate* predicate = new LessPredicate<int32_t>(2, 100);
column_predicates.emplace_back(predicate);
reader_context.predicates = &column_predicates;
RowsetReaderSharedPtr rowset_reader;
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
RowBlock* output_block;
uint32_t num_rows_read = 0;
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
ASSERT_TRUE(output_block != nullptr);
ASSERT_LE(output_block->row_num(), 100);
ASSERT_EQ(0, output_block->pos());
ASSERT_EQ(output_block->row_num(), output_block->limit());
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
// for unordered result, k3 will be 0, 1, 2, ..., 99
for (int i = 0; i < output_block->row_num(); ++i) {
char* field3 = output_block->field_ptr(i, 2);
// test null bit
ASSERT_FALSE(*reinterpret_cast<bool*>(field3));
uint32_t k3 = *reinterpret_cast<uint32_t*>(field3 + 1);
ASSERT_EQ(num_rows_read, k3);
num_rows_read++;
}
}
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
EXPECT_TRUE(output_block == nullptr);
EXPECT_EQ(100, num_rows_read);
}
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
EXPECT_TRUE(output_block == nullptr);
EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
}
}

View File

@ -74,7 +74,7 @@ public:
TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
size_t size = slices.size();
Slice* values = reinterpret_cast<Slice*>(arena.Allocate(size * sizeof(Slice)));
ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, &arena);
ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, size, &arena);
ColumnBlockView block_view(&column_block);
status = page_decoder.next_batch(&size, &block_view);
@ -164,7 +164,7 @@ public:
Arena arena;
TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
Slice* values = reinterpret_cast<Slice*>(arena.Allocate(sizeof(Slice)));
ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, &arena);
ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, 1, &arena);
ColumnBlockView block_view(&column_block);
size_t num = 1;

View File

@ -65,7 +65,7 @@ public:
Slice* values = reinterpret_cast<Slice*>(arena.Allocate(size * sizeof(Slice)));
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
ColumnBlock block(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values, null_bitmap, &arena);
ColumnBlock block(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values, null_bitmap, size, &arena);
ColumnBlockView column_block_view(&block);
status = page_decoder.next_batch(&size, &column_block_view);
@ -78,7 +78,7 @@ public:
ASSERT_EQ ("Doris", value[2].to_string());
Slice* values2 = reinterpret_cast<Slice*>(arena.Allocate(size * sizeof(Slice)));
ColumnBlock block2(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values2, null_bitmap, &arena);
ColumnBlock block2(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values2, null_bitmap, 1, &arena);
ColumnBlockView column_block_view2(&block2);
size_t fetch_num = 1;

View File

@ -37,7 +37,7 @@ public:
void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
Arena arena;
uint8_t null_bitmap = 0;
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena);
ColumnBlockView column_block_view(&block);
size_t n = 1;
@ -68,7 +68,7 @@ public:
CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena);
ColumnBlockView column_block_view(&block);
status = page_decoder.next_batch(&size, &column_block_view);

View File

@ -72,7 +72,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
std::unique_ptr<Field> field(FieldFactory::create(column));
ColumnWriter writer(writer_opts, std::move(field), true, wfile.get());
st = writer.init();
ASSERT_TRUE(st.ok());
ASSERT_TRUE(st.ok()) << st.to_string();
for (int i = 0; i < num_rows; ++i) {
st = writer.append(BitmapTest(src_is_null, i), src + i);
@ -117,13 +117,13 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
// sequence read
{
st = iter->seek_to_first();
ASSERT_TRUE(st.ok());
ASSERT_TRUE(st.ok()) << st.to_string();
Arena arena;
Type vals[1024];
Type* vals_ = vals;
uint8_t is_null[1024];
ColumnBlock col(type_info, (uint8_t*)vals, is_null, &arena);
ColumnBlock col(type_info, (uint8_t*)vals, is_null, 1024, &arena);
int idx = 0;
while (true) {
@ -155,7 +155,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
Arena arena;
Type vals[1024];
uint8_t is_null[1024];
ColumnBlock col(type_info, (uint8_t*)vals, is_null, &arena);
ColumnBlock col(type_info, (uint8_t*)vals, is_null, 1024, &arena);
for (int rowid = 0; rowid < num_rows; rowid += 4025) {
st = iter->seek_to_ordinal(rowid);

View File

@ -35,7 +35,7 @@ public:
void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
Arena arena;
uint8_t null_bitmap = 0;
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena);
ColumnBlockView column_block_view(&block);
size_t n = 1;
@ -66,7 +66,7 @@ public:
Arena arena;
CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena);
ColumnBlockView column_block_view(&block);
size_t size_to_fetch = size;
status = for_page_decoder.next_batch(&size_to_fetch, &column_block_view);

View File

@ -47,7 +47,7 @@ public:
void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
Arena arena;
uint8_t null_bitmap = 0;
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena);
ColumnBlockView column_block_view(&block);
size_t n = 1;
@ -78,7 +78,7 @@ public:
CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena);
ColumnBlockView column_block_view(&block);
status = page_decoder.next_batch(&size, &column_block_view);
ASSERT_TRUE(status.ok());

View File

@ -38,7 +38,7 @@ public:
void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
Arena arena;
uint8_t null_bitmap = 0;
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena);
ColumnBlockView column_block_view(&block);
size_t n = 1;
@ -70,7 +70,7 @@ public:
CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena);
ColumnBlockView column_block_view(&block);
size_t size_to_fetch = size;
status = rle_page_decoder.next_batch(&size_to_fetch, &column_block_view);

View File

@ -0,0 +1,49 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include "olap/selection_vector.h"
namespace doris {
class SelectionVectorTest : public testing::Test {
};
TEST_F(SelectionVectorTest, Normal) {
SelectionVector sel_vel(10);
ASSERT_EQ(10, sel_vel.nrows());
sel_vel.set_all_true();
ASSERT_EQ(" 0: 11111111 11 \n", sel_vel.to_string());
sel_vel.set_all_false();
ASSERT_EQ(" 0: 00000000 00 \n", sel_vel.to_string());
sel_vel.set_row_selected(7);
ASSERT_TRUE(sel_vel.is_row_selected(7));
ASSERT_TRUE(sel_vel.any_selected());
ASSERT_EQ(" 0: 00000001 00 \n", sel_vel.to_string());
sel_vel.clear_bit(7);
ASSERT_EQ(" 0: 00000000 00 \n", sel_vel.to_string());
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -278,6 +278,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/short_key_index_test
${DORIS_TEST_BINARY_DIR}/olap/key_coder_test
${DORIS_TEST_BINARY_DIR}/olap/page_cache_test
${DORIS_TEST_BINARY_DIR}/olap/hll_test
${DORIS_TEST_BINARY_DIR}/olap/selection_vector_test
# Running routine load test
${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test