committed by
lichaoyong
parent
3bca253fb3
commit
4f7cc7e033
@ -33,8 +33,8 @@ class ColumnBlockCell;
|
||||
// It doesn't own any data, user should keep the life of input data.
|
||||
class ColumnBlock {
|
||||
public:
|
||||
ColumnBlock(const TypeInfo* type_info, uint8_t* data, uint8_t* null_bitmap, Arena* arena)
|
||||
: _type_info(type_info), _data(data), _null_bitmap(null_bitmap), _arena(arena) { }
|
||||
ColumnBlock(const TypeInfo* type_info, uint8_t* data, uint8_t* null_bitmap, size_t nrows, Arena* arena)
|
||||
: _type_info(type_info), _data(data), _null_bitmap(null_bitmap), _nrows(nrows), _arena(arena) { }
|
||||
|
||||
const TypeInfo* type_info() const { return _type_info; }
|
||||
uint8_t* data() const { return _data; }
|
||||
@ -53,10 +53,14 @@ public:
|
||||
}
|
||||
|
||||
ColumnBlockCell cell(size_t idx) const;
|
||||
|
||||
size_t nrows() const { return _nrows; }
|
||||
|
||||
private:
|
||||
const TypeInfo* _type_info;
|
||||
uint8_t* _data;
|
||||
uint8_t* _null_bitmap;
|
||||
size_t _nrows;
|
||||
Arena* _arena;
|
||||
};
|
||||
|
||||
|
||||
@ -18,16 +18,29 @@
|
||||
#ifndef DORIS_BE_SRC_OLAP_COLUMN_PREDICATE_H
|
||||
#define DORIS_BE_SRC_OLAP_COLUMN_PREDICATE_H
|
||||
|
||||
#include "olap/column_block.h"
|
||||
#include "olap/selection_vector.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class VectorizedRowBatch;
|
||||
|
||||
class ColumnPredicate {
|
||||
public:
|
||||
explicit ColumnPredicate(uint32_t column_id) : _column_id(column_id) { }
|
||||
|
||||
virtual ~ColumnPredicate() {}
|
||||
|
||||
//evaluate predicate on VectorizedRowBatch
|
||||
virtual void evaluate(VectorizedRowBatch* batch) const = 0;
|
||||
|
||||
// evaluate predicate on ColumnBlock
|
||||
virtual void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const = 0;
|
||||
|
||||
uint32_t column_id() const { return _column_id; }
|
||||
|
||||
protected:
|
||||
uint32_t _column_id;
|
||||
};
|
||||
|
||||
} //namespace doris
|
||||
|
||||
@ -24,8 +24,8 @@ namespace doris {
|
||||
|
||||
#define COMPARISON_PRED_CONSTRUCTOR(CLASS) \
|
||||
template<class type> \
|
||||
CLASS<type>::CLASS(int column_id, const type& value) \
|
||||
: _column_id(column_id), \
|
||||
CLASS<type>::CLASS(uint32_t column_id, const type& value) \
|
||||
: ColumnPredicate(column_id), \
|
||||
_value(value) \
|
||||
{} \
|
||||
|
||||
@ -38,8 +38,8 @@ COMPARISON_PRED_CONSTRUCTOR(GreaterEqualPredicate)
|
||||
|
||||
#define COMPARISON_PRED_CONSTRUCTOR_STRING(CLASS) \
|
||||
template<> \
|
||||
CLASS<StringValue>::CLASS(int column_id, const StringValue& value) \
|
||||
: _column_id(column_id) \
|
||||
CLASS<StringValue>::CLASS(uint32_t column_id, const StringValue& value) \
|
||||
: ColumnPredicate(column_id) \
|
||||
{ \
|
||||
_value.len = value.len; \
|
||||
_value.ptr = value.ptr; \
|
||||
@ -102,7 +102,6 @@ COMPARISON_PRED_CONSTRUCTOR_STRING(GreaterEqualPredicate)
|
||||
} \
|
||||
} \
|
||||
|
||||
|
||||
COMPARISON_PRED_EVALUATE(EqualPredicate, ==)
|
||||
COMPARISON_PRED_EVALUATE(NotEqualPredicate, !=)
|
||||
COMPARISON_PRED_EVALUATE(LessPredicate, <)
|
||||
@ -110,18 +109,47 @@ COMPARISON_PRED_EVALUATE(LessEqualPredicate, <=)
|
||||
COMPARISON_PRED_EVALUATE(GreaterPredicate, >)
|
||||
COMPARISON_PRED_EVALUATE(GreaterEqualPredicate, >=)
|
||||
|
||||
#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \
|
||||
template<class type> \
|
||||
void CLASS<type>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \
|
||||
uint16_t new_size = 0; \
|
||||
if (block->is_nullable()) { \
|
||||
for (uint16_t i = 0; i < *size; ++i) { \
|
||||
uint16_t idx = sel[i]; \
|
||||
sel[new_size] = idx; \
|
||||
const type* cell_value = reinterpret_cast<const type*>(block->cell(idx).cell_ptr()); \
|
||||
new_size += (!block->cell(idx).is_null() && (*cell_value OP _value)); \
|
||||
} \
|
||||
} else { \
|
||||
for (uint16_t i = 0; i < *size; ++i) { \
|
||||
uint16_t idx = sel[i]; \
|
||||
sel[new_size] = idx; \
|
||||
const type* cell_value = reinterpret_cast<const type*>(block->cell(idx).cell_ptr()); \
|
||||
new_size += (*cell_value OP _value); \
|
||||
} \
|
||||
} \
|
||||
*size = new_size; \
|
||||
} \
|
||||
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(EqualPredicate, ==)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(NotEqualPredicate, !=)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(LessPredicate, <)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(LessEqualPredicate, <=)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
|
||||
|
||||
#define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS) \
|
||||
template CLASS<int8_t>::CLASS(int column_id, const int8_t& value); \
|
||||
template CLASS<int16_t>::CLASS(int column_id, const int16_t& value); \
|
||||
template CLASS<int32_t>::CLASS(int column_id, const int32_t& value); \
|
||||
template CLASS<int64_t>::CLASS(int column_id, const int64_t& value); \
|
||||
template CLASS<int128_t>::CLASS(int column_id, const int128_t& value); \
|
||||
template CLASS<float>::CLASS(int column_id, const float& value); \
|
||||
template CLASS<double>::CLASS(int column_id, const double& value); \
|
||||
template CLASS<decimal12_t>::CLASS(int column_id, const decimal12_t& value); \
|
||||
template CLASS<StringValue>::CLASS(int column_id, const StringValue& value); \
|
||||
template CLASS<uint24_t>::CLASS(int column_id, const uint24_t& value); \
|
||||
template CLASS<uint64_t>::CLASS(int column_id, const uint64_t& value); \
|
||||
template CLASS<int8_t>::CLASS(uint32_t column_id, const int8_t& value); \
|
||||
template CLASS<int16_t>::CLASS(uint32_t column_id, const int16_t& value); \
|
||||
template CLASS<int32_t>::CLASS(uint32_t column_id, const int32_t& value); \
|
||||
template CLASS<int64_t>::CLASS(uint32_t column_id, const int64_t& value); \
|
||||
template CLASS<int128_t>::CLASS(uint32_t column_id, const int128_t& value); \
|
||||
template CLASS<float>::CLASS(uint32_t column_id, const float& value); \
|
||||
template CLASS<double>::CLASS(uint32_t column_id, const double& value); \
|
||||
template CLASS<decimal12_t>::CLASS(uint32_t column_id, const decimal12_t& value); \
|
||||
template CLASS<StringValue>::CLASS(uint32_t column_id, const StringValue& value); \
|
||||
template CLASS<uint24_t>::CLASS(uint32_t column_id, const uint24_t& value); \
|
||||
template CLASS<uint64_t>::CLASS(uint32_t column_id, const uint64_t& value); \
|
||||
|
||||
COMPARISON_PRED_CONSTRUCTOR_DECLARATION(EqualPredicate)
|
||||
COMPARISON_PRED_CONSTRUCTOR_DECLARATION(NotEqualPredicate)
|
||||
@ -150,4 +178,24 @@ COMPARISON_PRED_EVALUATE_DECLARATION(LessEqualPredicate)
|
||||
COMPARISON_PRED_EVALUATE_DECLARATION(GreaterPredicate)
|
||||
COMPARISON_PRED_EVALUATE_DECLARATION(GreaterEqualPredicate)
|
||||
|
||||
#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS) \
|
||||
template void CLASS<int8_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<int16_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<int32_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<int64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<int128_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<float>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<double>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<decimal12_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<StringValue>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<uint24_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<uint64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(EqualPredicate)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotEqualPredicate)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(LessPredicate)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(LessEqualPredicate)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(GreaterPredicate)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(GreaterEqualPredicate)
|
||||
|
||||
} //namespace doris
|
||||
|
||||
@ -29,11 +29,11 @@ class VectorizedRowBatch;
|
||||
template <class type> \
|
||||
class CLASS : public ColumnPredicate { \
|
||||
public: \
|
||||
CLASS(int column_id, const type& value); \
|
||||
CLASS(uint32_t column_id, const type& value); \
|
||||
virtual ~CLASS() { } \
|
||||
virtual void evaluate(VectorizedRowBatch* batch) const override; \
|
||||
void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override; \
|
||||
private: \
|
||||
int32_t _column_id; \
|
||||
type _value; \
|
||||
}; \
|
||||
|
||||
|
||||
@ -94,6 +94,7 @@ Status AutoIncrementIterator::next_batch(RowBlockV2* block) {
|
||||
_rows_returned++;
|
||||
}
|
||||
block->set_num_rows(row_idx);
|
||||
block->set_selected_size(row_idx);
|
||||
if (row_idx > 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -125,7 +126,8 @@ public:
|
||||
// Before call this function, Client must assure that
|
||||
// valid() return true
|
||||
RowBlockRow current_row() const {
|
||||
return RowBlockRow(&_block, _index_in_block);
|
||||
uint16_t* selection_vector = _block.selection_vector();
|
||||
return RowBlockRow(&_block, selection_vector[_index_in_block]);
|
||||
}
|
||||
|
||||
// Advance internal row index to next valid row
|
||||
@ -148,22 +150,23 @@ private:
|
||||
RowBlockV2 _block;
|
||||
|
||||
bool _valid = false;
|
||||
size_t _index_in_block = 0;
|
||||
size_t _index_in_block = -1;
|
||||
};
|
||||
|
||||
Status MergeIteratorContext::init(const StorageReadOptions& opts) {
|
||||
RETURN_IF_ERROR(_iter->init(opts));
|
||||
RETURN_IF_ERROR(_load_next_block());
|
||||
if (valid()) {
|
||||
RETURN_IF_ERROR(advance());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status MergeIteratorContext::advance() {
|
||||
// NOTE: we increase _index_in_block directly to valid one check
|
||||
_index_in_block++;
|
||||
do {
|
||||
for (; _index_in_block < _block.num_rows(); ++_index_in_block) {
|
||||
// TODO(zc): we can skip rows that is fitered by conjunts here
|
||||
// Now we return directly
|
||||
_index_in_block++;
|
||||
if (_index_in_block < _block.selected_size()) {
|
||||
return Status::OK();
|
||||
}
|
||||
// current batch has no data, load next batch
|
||||
@ -186,7 +189,7 @@ Status MergeIteratorContext::_load_next_block() {
|
||||
}
|
||||
}
|
||||
} while (_block.num_rows() == 0);
|
||||
_index_in_block = 0;
|
||||
_index_in_block = -1;
|
||||
_valid = true;
|
||||
return Status::OK();
|
||||
}
|
||||
@ -266,6 +269,7 @@ Status MergeIterator::next_batch(RowBlockV2* block) {
|
||||
}
|
||||
}
|
||||
block->set_num_rows(row_idx);
|
||||
block->set_selected_size(row_idx);
|
||||
if (row_idx > 0) {
|
||||
return Status::OK();
|
||||
} else {
|
||||
|
||||
@ -24,8 +24,8 @@ namespace doris {
|
||||
|
||||
#define IN_LIST_PRED_CONSTRUCTOR(CLASS) \
|
||||
template<class type> \
|
||||
CLASS<type>::CLASS(int column_id, std::set<type>&& values) \
|
||||
: _column_id(column_id), \
|
||||
CLASS<type>::CLASS(uint32_t column_id, std::set<type>&& values) \
|
||||
: ColumnPredicate(column_id), \
|
||||
_values(std::move(values)) {} \
|
||||
|
||||
IN_LIST_PRED_CONSTRUCTOR(InListPredicate)
|
||||
@ -84,18 +84,43 @@ void CLASS<type>::evaluate(VectorizedRowBatch* batch) const { \
|
||||
IN_LIST_PRED_EVALUATE(InListPredicate, !=)
|
||||
IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
|
||||
|
||||
#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \
|
||||
template<class type> \
|
||||
void CLASS<type>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \
|
||||
uint16_t new_size = 0; \
|
||||
if (block->is_nullable()) { \
|
||||
for (uint16_t i = 0; i < *size; ++i) { \
|
||||
uint16_t idx = sel[i]; \
|
||||
sel[new_size] = idx; \
|
||||
const type* cell_value = reinterpret_cast<const type*>(block->cell(idx).cell_ptr()); \
|
||||
new_size += (!block->cell(idx).is_null() && _values.find(*cell_value) OP _values.end()); \
|
||||
} \
|
||||
} else { \
|
||||
for (uint16_t i = 0; i < *size; ++i) { \
|
||||
uint16_t idx = sel[i]; \
|
||||
sel[new_size] = idx; \
|
||||
const type* cell_value = reinterpret_cast<const type*>(block->cell(idx).cell_ptr()); \
|
||||
new_size += (_values.find(*cell_value) OP _values.end()); \
|
||||
} \
|
||||
} \
|
||||
*size = new_size; \
|
||||
} \
|
||||
|
||||
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, ==)
|
||||
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, !=)
|
||||
|
||||
#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \
|
||||
template CLASS<int8_t>::CLASS(int column_id, std::set<int8_t>&& values); \
|
||||
template CLASS<int16_t>::CLASS(int column_id, std::set<int16_t>&& values); \
|
||||
template CLASS<int32_t>::CLASS(int column_id, std::set<int32_t>&& values); \
|
||||
template CLASS<int64_t>::CLASS(int column_id, std::set<int64_t>&& values); \
|
||||
template CLASS<int128_t>::CLASS(int column_id, std::set<int128_t>&& values); \
|
||||
template CLASS<float>::CLASS(int column_id, std::set<float>&& values); \
|
||||
template CLASS<double>::CLASS(int column_id, std::set<double>&& values); \
|
||||
template CLASS<decimal12_t>::CLASS(int column_id, std::set<decimal12_t>&& values); \
|
||||
template CLASS<StringValue>::CLASS(int column_id, std::set<StringValue>&& values); \
|
||||
template CLASS<uint24_t>::CLASS(int column_id, std::set<uint24_t>&& values); \
|
||||
template CLASS<uint64_t>::CLASS(int column_id, std::set<uint64_t>&& values); \
|
||||
template CLASS<int8_t>::CLASS(uint32_t column_id, std::set<int8_t>&& values); \
|
||||
template CLASS<int16_t>::CLASS(uint32_t column_id, std::set<int16_t>&& values); \
|
||||
template CLASS<int32_t>::CLASS(uint32_t column_id, std::set<int32_t>&& values); \
|
||||
template CLASS<int64_t>::CLASS(uint32_t column_id, std::set<int64_t>&& values); \
|
||||
template CLASS<int128_t>::CLASS(uint32_t column_id, std::set<int128_t>&& values); \
|
||||
template CLASS<float>::CLASS(uint32_t column_id, std::set<float>&& values); \
|
||||
template CLASS<double>::CLASS(uint32_t column_id, std::set<double>&& values); \
|
||||
template CLASS<decimal12_t>::CLASS(uint32_t column_id, std::set<decimal12_t>&& values); \
|
||||
template CLASS<StringValue>::CLASS(uint32_t column_id, std::set<StringValue>&& values); \
|
||||
template CLASS<uint24_t>::CLASS(uint32_t column_id, std::set<uint24_t>&& values); \
|
||||
template CLASS<uint64_t>::CLASS(uint32_t column_id, std::set<uint64_t>&& values); \
|
||||
|
||||
IN_LIST_PRED_CONSTRUCTOR_DECLARATION(InListPredicate)
|
||||
IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate)
|
||||
@ -115,4 +140,21 @@ IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate)
|
||||
|
||||
IN_LIST_PRED_EVALUATE_DECLARATION(InListPredicate)
|
||||
IN_LIST_PRED_EVALUATE_DECLARATION(NotInListPredicate)
|
||||
|
||||
#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS) \
|
||||
template void CLASS<int8_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<int16_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<int32_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<int64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<int128_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<float>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<double>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<decimal12_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<StringValue>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<uint24_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
template void CLASS<uint64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
|
||||
|
||||
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(InListPredicate)
|
||||
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate)
|
||||
|
||||
} //namespace doris
|
||||
|
||||
@ -30,11 +30,11 @@ class VectorizedRowBatch;
|
||||
template <class type> \
|
||||
class CLASS : public ColumnPredicate { \
|
||||
public: \
|
||||
CLASS(int column_id, std::set<type>&& values); \
|
||||
CLASS(uint32_t column_id, std::set<type>&& values); \
|
||||
virtual ~CLASS() {} \
|
||||
virtual void evaluate(VectorizedRowBatch* batch) const override; \
|
||||
void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override; \
|
||||
private: \
|
||||
int32_t _column_id; \
|
||||
std::set<type> _values; \
|
||||
}; \
|
||||
|
||||
|
||||
@ -27,6 +27,7 @@ class RowCursor;
|
||||
class RowBlockV2;
|
||||
class Schema;
|
||||
class Conditions;
|
||||
class ColumnPredicate;
|
||||
|
||||
class StorageReadOptions {
|
||||
public:
|
||||
@ -69,6 +70,11 @@ public:
|
||||
|
||||
// delete conditions used by column index to filter pages
|
||||
std::vector<const Conditions*> delete_conditions;
|
||||
// reader's column predicate, nullptr if not existed
|
||||
// used to fiter rows in row block
|
||||
// TODO(hkp): refactor the column predicate framework
|
||||
// to unify Conditions and ColumnPredicate
|
||||
const std::vector<ColumnPredicate*>* column_predicates = nullptr;
|
||||
};
|
||||
|
||||
// Used to read data in RowBlockV2 one by one
|
||||
|
||||
@ -22,8 +22,8 @@
|
||||
|
||||
namespace doris {
|
||||
|
||||
NullPredicate::NullPredicate(int32_t column_id, bool is_null)
|
||||
: _column_id(column_id), _is_null(is_null) {}
|
||||
NullPredicate::NullPredicate(uint32_t column_id, bool is_null)
|
||||
: ColumnPredicate(column_id), _is_null(is_null) {}
|
||||
|
||||
NullPredicate::~NullPredicate() {}
|
||||
|
||||
@ -60,4 +60,18 @@ void NullPredicate::evaluate(VectorizedRowBatch* batch) const {
|
||||
}
|
||||
}
|
||||
|
||||
void NullPredicate::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const {
|
||||
uint16_t new_size = 0;
|
||||
if (!block->is_nullable() && _is_null) {
|
||||
*size = 0;
|
||||
return;
|
||||
}
|
||||
for (uint16_t i = 0; i < *size; ++i) {
|
||||
uint16_t idx = sel[i];
|
||||
sel[new_size] = idx;
|
||||
new_size += (block->cell(idx).is_null() == _is_null);
|
||||
}
|
||||
*size = new_size;
|
||||
}
|
||||
|
||||
} //namespace doris
|
||||
|
||||
@ -18,7 +18,6 @@
|
||||
#ifndef DORIS_BE_SRC_OLAP_NULL_PREDICATE_H
|
||||
#define DORIS_BE_SRC_OLAP_NULL_PREDICATE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "olap/column_predicate.h"
|
||||
|
||||
namespace doris {
|
||||
@ -27,12 +26,14 @@ class VectorizedRowBatch;
|
||||
|
||||
class NullPredicate : public ColumnPredicate {
|
||||
public:
|
||||
NullPredicate(int32_t column_id, bool is_null);
|
||||
NullPredicate(uint32_t column_id, bool is_null);
|
||||
virtual ~NullPredicate();
|
||||
|
||||
virtual void evaluate(VectorizedRowBatch* batch) const override;
|
||||
|
||||
void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override;
|
||||
|
||||
private:
|
||||
int32_t _column_id;
|
||||
bool _is_null; //true for null, false for not null
|
||||
};
|
||||
|
||||
|
||||
@ -30,7 +30,8 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity)
|
||||
: _schema(schema),
|
||||
_capacity(capacity),
|
||||
_column_datas(_schema.num_columns(), nullptr),
|
||||
_column_null_bitmaps(_schema.num_columns(), nullptr) {
|
||||
_column_null_bitmaps(_schema.num_columns(), nullptr),
|
||||
_selection_vector(nullptr) {
|
||||
auto bitmap_size = BitmapSize(capacity);
|
||||
for (auto cid : _schema.column_ids()) {
|
||||
size_t data_size = _schema.column(cid)->type_info()->size() * _capacity;
|
||||
@ -40,6 +41,7 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity)
|
||||
_column_null_bitmaps[cid] = new uint8_t[bitmap_size];;
|
||||
}
|
||||
}
|
||||
_selection_vector = new uint16_t[_capacity];
|
||||
clear();
|
||||
}
|
||||
|
||||
@ -50,6 +52,7 @@ RowBlockV2::~RowBlockV2() {
|
||||
for (auto null_bitmap : _column_null_bitmaps) {
|
||||
delete[] null_bitmap;
|
||||
}
|
||||
delete[] _selection_vector;
|
||||
}
|
||||
|
||||
Status RowBlockV2::copy_to_row_cursor(size_t row_idx, RowCursor* cursor) {
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#include "olap/column_block.h"
|
||||
#include "olap/schema.h"
|
||||
#include "olap/types.h"
|
||||
#include "olap/selection_vector.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -54,6 +55,10 @@ public:
|
||||
void clear() {
|
||||
_num_rows = 0;
|
||||
_arena.reset(new Arena);
|
||||
_selected_size = _capacity;
|
||||
for (int i = 0; i < _selected_size; ++i) {
|
||||
_selection_vector[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the row_idx row's data into given row_cursor.
|
||||
@ -67,13 +72,25 @@ public:
|
||||
const TypeInfo* type_info = _schema.column(cid)->type_info();
|
||||
uint8_t* data = _column_datas[cid];
|
||||
uint8_t* null_bitmap = _column_null_bitmaps[cid];
|
||||
return ColumnBlock(type_info, data, null_bitmap, _arena.get());
|
||||
return ColumnBlock(type_info, data, null_bitmap, _capacity, _arena.get());
|
||||
}
|
||||
|
||||
RowBlockRow row(size_t row_idx) const;
|
||||
|
||||
const Schema* schema() const { return &_schema; }
|
||||
|
||||
uint16_t* selection_vector() const {
|
||||
return _selection_vector;
|
||||
}
|
||||
|
||||
uint16_t selected_size() const {
|
||||
return _selected_size;
|
||||
}
|
||||
|
||||
void set_selected_size(uint16_t selected_size) {
|
||||
_selected_size = selected_size;
|
||||
}
|
||||
|
||||
private:
|
||||
Schema _schema;
|
||||
size_t _capacity;
|
||||
@ -88,6 +105,11 @@ private:
|
||||
size_t _num_rows;
|
||||
// manages the memory for slice's data
|
||||
std::unique_ptr<Arena> _arena;
|
||||
|
||||
// index of selected rows for rows passed the predicate
|
||||
uint16_t* _selection_vector;
|
||||
// selected rows number
|
||||
uint16_t _selected_size;
|
||||
};
|
||||
|
||||
// Stands for a row in RowBlockV2. It is consisted of a RowBlockV2 reference
|
||||
|
||||
@ -52,6 +52,7 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) {
|
||||
read_context->delete_handler->get_delete_conditions_after_version(_rowset->end_version(),
|
||||
&read_options.delete_conditions);
|
||||
}
|
||||
read_options.column_predicates = read_context->predicates;
|
||||
|
||||
// create iterator for each segment
|
||||
std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators;
|
||||
@ -117,7 +118,10 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) {
|
||||
}
|
||||
// convert to output block
|
||||
_output_block->clear();
|
||||
for (size_t row_idx = 0; row_idx < _input_block->num_rows(); ++row_idx) {
|
||||
size_t rows_read = 0;
|
||||
uint16_t* selection_vector = _input_block->selection_vector();
|
||||
for (size_t i = 0; i < _input_block->selected_size(); ++i) {
|
||||
uint16_t row_idx = selection_vector[i];
|
||||
// shallow copy row from input block to output block
|
||||
_output_block->get_row(row_idx, _row.get());
|
||||
// this copy function will copy return_columns' row to seek_columns's row_cursor
|
||||
@ -126,10 +130,11 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) {
|
||||
LOG(WARNING) << "failed to copy row: " << s.to_string();
|
||||
return OLAP_ERR_ROWSET_READ_FAILED;
|
||||
}
|
||||
++rows_read;
|
||||
}
|
||||
_output_block->set_pos(0);
|
||||
_output_block->set_limit(_input_block->num_rows());
|
||||
_output_block->finalize(_input_block->num_rows());
|
||||
_output_block->set_limit(rows_read);
|
||||
_output_block->finalize(rows_read);
|
||||
*block = _output_block.get();
|
||||
// update raw_rows_read counter
|
||||
_context->stats->raw_rows_read += _input_block->num_rows();
|
||||
|
||||
@ -206,7 +206,7 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) {
|
||||
// And then copy the strings corresponding to the codewords to the destination buffer
|
||||
TypeInfo *type_info = get_type_info(OLAP_FIELD_TYPE_INT);
|
||||
// the data in page is not null
|
||||
ColumnBlock column_block(type_info, _code_buf.data(), nullptr, dst->column_block()->arena());
|
||||
ColumnBlock column_block(type_info, _code_buf.data(), nullptr, *n, dst->column_block()->arena());
|
||||
ColumnBlockView tmp_block_view(&column_block);
|
||||
RETURN_IF_ERROR(_data_page_decoder->next_batch(n, &tmp_block_view));
|
||||
for (int i = 0; i < *n; ++i) {
|
||||
|
||||
@ -21,6 +21,7 @@
|
||||
#include "olap/rowset/segment_v2/bitshuffle_page.h"
|
||||
#include "olap/rowset/segment_v2/rle_page.h"
|
||||
#include "olap/rowset/segment_v2/binary_dict_page.h"
|
||||
#include "gutil/strings/substitute.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
@ -167,7 +168,9 @@ Status EncodingInfoResolver::get(
|
||||
auto key = std::make_pair(data_type, encoding_type);
|
||||
auto it = _encoding_map.find(key);
|
||||
if (it == std::end(_encoding_map)) {
|
||||
return Status::InternalError("fail to find valid type encoding");
|
||||
return Status::InternalError(
|
||||
strings::Substitute("fail to find valid type encoding, type:$0, encoding:$1",
|
||||
data_type, encoding_type));
|
||||
}
|
||||
*out = it->second;
|
||||
return Status::OK();
|
||||
|
||||
@ -26,6 +26,7 @@
|
||||
#include "olap/row_block2.h"
|
||||
#include "olap/row_cursor.h"
|
||||
#include "olap/short_key_index.h"
|
||||
#include "olap/column_predicate.h"
|
||||
|
||||
using strings::Substitute;
|
||||
|
||||
@ -310,34 +311,50 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
|
||||
block->set_num_rows(0);
|
||||
return Status::EndOfFile("no more data in segment");
|
||||
}
|
||||
size_t rows_to_read = block->capacity();
|
||||
while (rows_to_read > 0) {
|
||||
if (_cur_rowid >= _row_ranges.get_range_to(_cur_range_id)) {
|
||||
// current row range is read over, trying to read from next range
|
||||
if (_cur_range_id >= _row_ranges.range_size() - 1) {
|
||||
// there is no more row range
|
||||
break;
|
||||
}
|
||||
|
||||
// check whether need to seek
|
||||
if (_cur_rowid >= _row_ranges.get_range_to(_cur_range_id)) {
|
||||
while (true) {
|
||||
// step to next row range
|
||||
++_cur_range_id;
|
||||
_cur_rowid = _row_ranges.get_range_from(_cur_range_id);
|
||||
// current row range is read over, trying to read from next range
|
||||
if (_cur_range_id >= _row_ranges.range_size() - 1) {
|
||||
block->set_num_rows(0);
|
||||
return Status::EndOfFile("no more data in segment");
|
||||
}
|
||||
if (_row_ranges.get_range_count(_cur_range_id) == 0) {
|
||||
// current row range is empty, just skip seek
|
||||
continue;
|
||||
}
|
||||
_cur_rowid = _row_ranges.get_range_from(_cur_range_id);
|
||||
for (auto cid : block->schema()->column_ids()) {
|
||||
RETURN_IF_ERROR(_column_iterators[cid]->seek_to_ordinal(_cur_rowid));
|
||||
}
|
||||
break;
|
||||
}
|
||||
size_t to_read_in_range = std::min(rows_to_read, size_t(_row_ranges.get_range_to(_cur_range_id) - _cur_rowid));
|
||||
RETURN_IF_ERROR(_next_batch(block, &to_read_in_range));
|
||||
_cur_rowid += to_read_in_range;
|
||||
rows_to_read -= to_read_in_range;
|
||||
}
|
||||
block->set_num_rows(block->capacity() - rows_to_read);
|
||||
// next_batch just return the rows in current row range
|
||||
// it is easier to realize lazy materialization in the future
|
||||
size_t rows_to_read = std::min(block->capacity(), size_t(_row_ranges.get_range_to(_cur_range_id) - _cur_rowid));
|
||||
RETURN_IF_ERROR(_next_batch(block, &rows_to_read));
|
||||
_cur_rowid += rows_to_read;
|
||||
block->set_num_rows(rows_to_read);
|
||||
|
||||
if (block->num_rows() == 0) {
|
||||
return Status::EndOfFile("no more data in segment");
|
||||
}
|
||||
// column predicate vectorization execution
|
||||
// TODO(hkp): lazy materialization
|
||||
// TODO(hkp): optimize column predicate to check column block once for one column
|
||||
if (_opts.column_predicates != nullptr) {
|
||||
// init selection position index
|
||||
uint16_t selected_size = block->selected_size();
|
||||
for (auto column_predicate : *_opts.column_predicates) {
|
||||
auto column_block = block->column_block(column_predicate->column_id());
|
||||
column_predicate->evaluate(&column_block, block->selection_vector(), &selected_size);
|
||||
}
|
||||
block->set_selected_size(selected_size);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
172
be/src/olap/selection_vector.h
Normal file
172
be/src/olap/selection_vector.h
Normal file
@ -0,0 +1,172 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "util/bitmap.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
// Bit-vector representing the selection status of each row in a row block.
|
||||
//
|
||||
// It is used to scan data with column predicate.
|
||||
// the bit in the selection vector is used to indicate whether the given row is live.
|
||||
class SelectionVector {
|
||||
public:
|
||||
// Construct a new vector. The bits are initially in an indeterminate state.
|
||||
// Call set_all_true() if you require all rows to be initially selected.
|
||||
explicit SelectionVector(size_t row_capacity)
|
||||
: _n_rows(row_capacity),
|
||||
_n_bytes(BitmapSize(row_capacity)),
|
||||
_bitmap(new uint8_t[_n_bytes]) {
|
||||
CHECK_GT(_n_bytes, 0);
|
||||
set_all_false();
|
||||
}
|
||||
|
||||
// returen the number of selected rows.
|
||||
size_t count_selected() const {
|
||||
return Bits::Count(_bitmap.get(), _n_bytes);
|
||||
}
|
||||
|
||||
// Return true if any rows are selected, or false
|
||||
// This is equivalent to (count_selected() > 0), but faster.
|
||||
inline bool any_selected() const;
|
||||
|
||||
bool is_row_selected(size_t row) const {
|
||||
DCHECK_LT(row, _n_rows);
|
||||
return BitmapTest(_bitmap.get(), row);
|
||||
}
|
||||
|
||||
void set_row_selected(size_t row) {
|
||||
DCHECK_LT(row, _n_rows);
|
||||
BitmapSet(_bitmap.get(), row);
|
||||
}
|
||||
|
||||
void set_all_true() {
|
||||
memset(_bitmap.get(), 0xff, _n_bytes);
|
||||
pad_extra_bits_wit_zeroes();
|
||||
}
|
||||
|
||||
void set_all_false() {
|
||||
memset(_bitmap.get(), 0, _n_bytes);
|
||||
}
|
||||
|
||||
void clear_bit(size_t row) {
|
||||
DCHECK_LT(row, _n_rows);
|
||||
return BitmapClear(_bitmap.get(), row);
|
||||
}
|
||||
|
||||
uint8_t* mutable_bitmap() { return _bitmap.get(); }
|
||||
|
||||
const uint8_t* bitmap() const { return _bitmap.get(); }
|
||||
|
||||
size_t nrows() const { return _n_rows; }
|
||||
|
||||
std::string to_string() const {
|
||||
return BitmapToString(_bitmap.get(), _n_rows);
|
||||
}
|
||||
|
||||
private:
|
||||
// Pads any non-byte-aligned bits at the end of the SelectionVector with
|
||||
// zeroes.
|
||||
//
|
||||
// To improve performance, CountSelected() and AnySelected() evaluate the
|
||||
// SelectionVector's bitmap in terms of bytes. As such, they consider all of
|
||||
// the trailing bits, even if the bitmap's bit length is not byte-aligned and
|
||||
// some trailing bits aren't part of the bitmap.
|
||||
//
|
||||
// To address this without sacrificing performance, we need to zero out all
|
||||
// trailing bits at construction time, or after any operation that sets all
|
||||
// bytes in bulk.
|
||||
|
||||
void pad_extra_bits_wit_zeroes() {
|
||||
size_t bits_in_last_byte = _n_rows & 7;
|
||||
if (bits_in_last_byte > 0) {
|
||||
BitmapChangeBits(_bitmap.get(), _n_rows, 8 - bits_in_last_byte, false);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// row capacity
|
||||
size_t _n_rows;
|
||||
size_t _n_bytes;
|
||||
std::unique_ptr<uint8_t[]> _bitmap;
|
||||
DISALLOW_COPY_AND_ASSIGN(SelectionVector);
|
||||
};
|
||||
|
||||
inline bool SelectionVector::any_selected() const {
|
||||
size_t rem = _n_bytes;
|
||||
const uint32_t* p32 = reinterpret_cast<const uint32_t*>(_bitmap.get());
|
||||
while (rem >= 4) {
|
||||
if (*p32 != 0) {
|
||||
return true;
|
||||
}
|
||||
++p32;
|
||||
rem -= 4;
|
||||
}
|
||||
|
||||
const uint8_t* p8 = reinterpret_cast<const uint8_t*>(p32);
|
||||
while (rem > 0) {
|
||||
if (*p8 != 0) {
|
||||
return true;
|
||||
}
|
||||
++p8;
|
||||
--rem;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// A SelectionVectorView keeps track of where in the selection vector a given
|
||||
// batch will start from. After processing a batch, Advance() should be called
|
||||
// and the view will move forward by the appropriate amount. In this way, the
|
||||
// underlying selection vector can easily be updated batch-by-batch.
|
||||
class SelectionVectorView {
|
||||
public:
|
||||
// Constructs a new SelectionVectorView.
|
||||
//
|
||||
// The 'sel_vec' object must outlive this SelectionVectorView.
|
||||
explicit SelectionVectorView(SelectionVector* sel_vec)
|
||||
: _sel_vec(sel_vec), _row_offset(0) {}
|
||||
void advance(size_t skip) {
|
||||
DCHECK_LE(skip, _sel_vec->nrows() - _row_offset);
|
||||
_row_offset += skip;
|
||||
}
|
||||
void set_bit(size_t row_idx) {
|
||||
DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
|
||||
BitmapSet(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
|
||||
}
|
||||
void clear_bit(size_t row_idx) {
|
||||
DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
|
||||
BitmapClear(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
|
||||
}
|
||||
bool test_bit(size_t row_idx) {
|
||||
DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
|
||||
return BitmapTest(_sel_vec->bitmap(), _row_offset + row_idx);
|
||||
}
|
||||
void clear_bits(size_t nrows) {
|
||||
DCHECK_LE(nrows, _sel_vec->nrows() - _row_offset);
|
||||
BitmapChangeBits(_sel_vec->mutable_bitmap(), _row_offset, nrows, false);
|
||||
}
|
||||
|
||||
private:
|
||||
SelectionVector* _sel_vec;
|
||||
size_t _row_offset;
|
||||
};
|
||||
|
||||
}
|
||||
@ -19,6 +19,8 @@
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include "gutil/stringprintf.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
std::string Bitmap::DebugString(bool print_bits) const {
|
||||
@ -132,4 +134,21 @@ bool BitmapFindFirst(const uint8_t *bitmap, size_t offset, size_t bitmap_size,
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string BitmapToString(const uint8_t *bitmap, size_t num_bits) {
|
||||
std::string s;
|
||||
size_t index = 0;
|
||||
while (index < num_bits) {
|
||||
StringAppendF(&s, "%4zu: ", index);
|
||||
for (int i = 0; i < 8 && index < num_bits; ++i) {
|
||||
for (int j = 0; j < 8 && index < num_bits; ++j) {
|
||||
StringAppendF(&s, "%d", BitmapTest(bitmap, index));
|
||||
index++;
|
||||
}
|
||||
StringAppendF(&s, " ");
|
||||
}
|
||||
StringAppendF(&s, "\n");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -113,6 +113,13 @@ inline bool BitmapEquals(const uint8_t* bm1, const uint8_t* bm2, size_t bitmap_s
|
||||
return (bm1[num_full_bytes] & mask) == (bm2[num_full_bytes] & mask);
|
||||
}
|
||||
|
||||
// This function will print the bitmap content in a format like the following:
|
||||
// eg: 0001110000100010110011001100001100110011
|
||||
// output:
|
||||
// 0000: 00011100 00100010 11001100 11000011
|
||||
// 0016: 00110011
|
||||
std::string BitmapToString(const uint8_t *bitmap, size_t num_bits);
|
||||
|
||||
// Iterator which yields ranges of set and unset bits.
|
||||
// Example usage:
|
||||
// bool value;
|
||||
|
||||
@ -73,3 +73,4 @@ ADD_BE_TEST(short_key_index_test)
|
||||
ADD_BE_TEST(page_cache_test)
|
||||
ADD_BE_TEST(hll_test)
|
||||
ADD_BE_TEST(memtable_flush_executor_test)
|
||||
ADD_BE_TEST(selection_vector_test)
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "olap/storage_engine.h"
|
||||
#include "olap/tablet_schema.h"
|
||||
#include "olap/utils.h"
|
||||
#include "olap/comparison_predicate.h"
|
||||
#include "runtime/mem_tracker.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
#include "util/slice.h"
|
||||
@ -184,37 +185,75 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) {
|
||||
reader_context.seek_columns = &return_columns;
|
||||
reader_context.stats = &_stats;
|
||||
|
||||
RowsetReaderSharedPtr rowset_reader;
|
||||
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
|
||||
// without predicates
|
||||
{
|
||||
RowsetReaderSharedPtr rowset_reader;
|
||||
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
|
||||
RowBlock* output_block;
|
||||
uint32_t num_rows_read = 0;
|
||||
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
|
||||
ASSERT_TRUE(output_block != nullptr);
|
||||
ASSERT_GT(output_block->row_num(), 0);
|
||||
ASSERT_EQ(0, output_block->pos());
|
||||
ASSERT_EQ(output_block->row_num(), output_block->limit());
|
||||
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
|
||||
// after sort merge segments, k1 will be 0, 1, 2, 10, 11, 12, 20, 21, 22, ..., 40950, 40951, 40952
|
||||
for (int i = 0; i < output_block->row_num(); ++i) {
|
||||
char* field1 = output_block->field_ptr(i, 0);
|
||||
char* field2 = output_block->field_ptr(i, 1);
|
||||
// test null bit
|
||||
ASSERT_FALSE(*reinterpret_cast<bool*>(field1));
|
||||
ASSERT_FALSE(*reinterpret_cast<bool*>(field2));
|
||||
uint32_t k1 = *reinterpret_cast<uint32_t*>(field1 + 1);
|
||||
uint32_t k2 = *reinterpret_cast<uint32_t*>(field2 + 1);
|
||||
ASSERT_EQ(k1 * 10, k2);
|
||||
|
||||
RowBlock* output_block;
|
||||
uint32_t num_rows_read = 0;
|
||||
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
|
||||
ASSERT_TRUE(output_block != nullptr);
|
||||
ASSERT_GT(output_block->row_num(), 0);
|
||||
ASSERT_EQ(0, output_block->pos());
|
||||
ASSERT_EQ(output_block->row_num(), output_block->limit());
|
||||
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
|
||||
// after sort merge segments, k1 will be 0, 1, 2, 10, 11, 12, 20, 21, 22, ..., 40950, 40951, 40952
|
||||
for (int i = 0; i < output_block->row_num(); ++i) {
|
||||
char* field1 = output_block->field_ptr(i, 0);
|
||||
char* field2 = output_block->field_ptr(i, 1);
|
||||
// test null bit
|
||||
ASSERT_FALSE(*reinterpret_cast<bool*>(field1));
|
||||
ASSERT_FALSE(*reinterpret_cast<bool*>(field2));
|
||||
uint32_t k1 = *reinterpret_cast<uint32_t*>(field1 + 1);
|
||||
uint32_t k2 = *reinterpret_cast<uint32_t*>(field2 + 1);
|
||||
ASSERT_EQ(k1 * 10, k2);
|
||||
|
||||
int rid = num_rows_read / 3;
|
||||
int seg_id = num_rows_read % 3;
|
||||
ASSERT_EQ(rid * 10 + seg_id, k1);
|
||||
num_rows_read++;
|
||||
int rid = num_rows_read / 3;
|
||||
int seg_id = num_rows_read % 3;
|
||||
ASSERT_EQ(rid * 10 + seg_id, k1);
|
||||
num_rows_read++;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
|
||||
EXPECT_TRUE(output_block == nullptr);
|
||||
EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
|
||||
}
|
||||
|
||||
// merge segments with predicates
|
||||
{
|
||||
std::vector<ColumnPredicate*> column_predicates;
|
||||
// column predicate: k1 = 10
|
||||
std::unique_ptr<ColumnPredicate> predicate(new EqualPredicate<int32_t>(0, 10));
|
||||
column_predicates.emplace_back(predicate.get());
|
||||
reader_context.predicates = &column_predicates;
|
||||
RowsetReaderSharedPtr rowset_reader;
|
||||
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
|
||||
RowBlock* output_block;
|
||||
uint32_t num_rows_read = 0;
|
||||
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
|
||||
ASSERT_TRUE(output_block != nullptr);
|
||||
ASSERT_EQ(1, output_block->row_num());
|
||||
ASSERT_EQ(0, output_block->pos());
|
||||
ASSERT_EQ(output_block->row_num(), output_block->limit());
|
||||
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
|
||||
// after sort merge segments, k1 will be 10
|
||||
for (int i = 0; i < output_block->row_num(); ++i) {
|
||||
char* field1 = output_block->field_ptr(i, 0);
|
||||
char* field2 = output_block->field_ptr(i, 1);
|
||||
// test null bit
|
||||
ASSERT_FALSE(*reinterpret_cast<bool*>(field1));
|
||||
ASSERT_FALSE(*reinterpret_cast<bool*>(field2));
|
||||
uint32_t k1 = *reinterpret_cast<uint32_t*>(field1 + 1);
|
||||
uint32_t k2 = *reinterpret_cast<uint32_t*>(field2 + 1);
|
||||
ASSERT_EQ(10, k1);
|
||||
ASSERT_EQ(k1 * 10, k2);
|
||||
num_rows_read++;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
|
||||
EXPECT_TRUE(output_block == nullptr);
|
||||
EXPECT_EQ(1, num_rows_read);
|
||||
}
|
||||
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
|
||||
EXPECT_TRUE(output_block == nullptr);
|
||||
EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
|
||||
}
|
||||
|
||||
{ // test return unordered data and only k3
|
||||
@ -226,30 +265,66 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) {
|
||||
reader_context.seek_columns = &return_columns;
|
||||
reader_context.stats = &_stats;
|
||||
|
||||
RowsetReaderSharedPtr rowset_reader;
|
||||
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
|
||||
// without predicate
|
||||
{
|
||||
RowsetReaderSharedPtr rowset_reader;
|
||||
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
|
||||
|
||||
RowBlock* output_block;
|
||||
uint32_t num_rows_read = 0;
|
||||
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
|
||||
ASSERT_TRUE(output_block != nullptr);
|
||||
ASSERT_GT(output_block->row_num(), 0);
|
||||
ASSERT_EQ(0, output_block->pos());
|
||||
ASSERT_EQ(output_block->row_num(), output_block->limit());
|
||||
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
|
||||
// for unordered result, k3 will be 0, 1, 2, ..., 4096*3-1
|
||||
for (int i = 0; i < output_block->row_num(); ++i) {
|
||||
char* field3 = output_block->field_ptr(i, 2);
|
||||
// test null bit
|
||||
ASSERT_FALSE(*reinterpret_cast<bool*>(field3));
|
||||
uint32_t k3 = *reinterpret_cast<uint32_t*>(field3 + 1);
|
||||
ASSERT_EQ(num_rows_read, k3);
|
||||
num_rows_read++;
|
||||
RowBlock* output_block;
|
||||
uint32_t num_rows_read = 0;
|
||||
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
|
||||
ASSERT_TRUE(output_block != nullptr);
|
||||
ASSERT_GT(output_block->row_num(), 0);
|
||||
ASSERT_EQ(0, output_block->pos());
|
||||
ASSERT_EQ(output_block->row_num(), output_block->limit());
|
||||
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
|
||||
// for unordered result, k3 will be 0, 1, 2, ..., 4096*3-1
|
||||
for (int i = 0; i < output_block->row_num(); ++i) {
|
||||
char* field3 = output_block->field_ptr(i, 2);
|
||||
// test null bit
|
||||
ASSERT_FALSE(*reinterpret_cast<bool*>(field3));
|
||||
uint32_t k3 = *reinterpret_cast<uint32_t*>(field3 + 1);
|
||||
ASSERT_EQ(num_rows_read, k3);
|
||||
num_rows_read++;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
|
||||
EXPECT_TRUE(output_block == nullptr);
|
||||
EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
|
||||
}
|
||||
|
||||
// with predicate
|
||||
{
|
||||
std::vector<ColumnPredicate*> column_predicates;
|
||||
// column predicate: k3 < 100
|
||||
ColumnPredicate* predicate = new LessPredicate<int32_t>(2, 100);
|
||||
column_predicates.emplace_back(predicate);
|
||||
reader_context.predicates = &column_predicates;
|
||||
RowsetReaderSharedPtr rowset_reader;
|
||||
create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
|
||||
|
||||
RowBlock* output_block;
|
||||
uint32_t num_rows_read = 0;
|
||||
while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) {
|
||||
ASSERT_TRUE(output_block != nullptr);
|
||||
ASSERT_LE(output_block->row_num(), 100);
|
||||
ASSERT_EQ(0, output_block->pos());
|
||||
ASSERT_EQ(output_block->row_num(), output_block->limit());
|
||||
ASSERT_EQ(return_columns, output_block->row_block_info().column_ids);
|
||||
// for unordered result, k3 will be 0, 1, 2, ..., 99
|
||||
for (int i = 0; i < output_block->row_num(); ++i) {
|
||||
char* field3 = output_block->field_ptr(i, 2);
|
||||
// test null bit
|
||||
ASSERT_FALSE(*reinterpret_cast<bool*>(field3));
|
||||
uint32_t k3 = *reinterpret_cast<uint32_t*>(field3 + 1);
|
||||
ASSERT_EQ(num_rows_read, k3);
|
||||
num_rows_read++;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
|
||||
EXPECT_TRUE(output_block == nullptr);
|
||||
EXPECT_EQ(100, num_rows_read);
|
||||
}
|
||||
EXPECT_EQ(OLAP_ERR_DATA_EOF, s);
|
||||
EXPECT_TRUE(output_block == nullptr);
|
||||
EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -74,7 +74,7 @@ public:
|
||||
TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
|
||||
size_t size = slices.size();
|
||||
Slice* values = reinterpret_cast<Slice*>(arena.Allocate(size * sizeof(Slice)));
|
||||
ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, &arena);
|
||||
ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, size, &arena);
|
||||
ColumnBlockView block_view(&column_block);
|
||||
|
||||
status = page_decoder.next_batch(&size, &block_view);
|
||||
@ -164,7 +164,7 @@ public:
|
||||
Arena arena;
|
||||
TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
|
||||
Slice* values = reinterpret_cast<Slice*>(arena.Allocate(sizeof(Slice)));
|
||||
ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, &arena);
|
||||
ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, 1, &arena);
|
||||
ColumnBlockView block_view(&column_block);
|
||||
|
||||
size_t num = 1;
|
||||
|
||||
@ -65,7 +65,7 @@ public:
|
||||
|
||||
Slice* values = reinterpret_cast<Slice*>(arena.Allocate(size * sizeof(Slice)));
|
||||
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
|
||||
ColumnBlock block(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values, null_bitmap, &arena);
|
||||
ColumnBlock block(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values, null_bitmap, size, &arena);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
|
||||
status = page_decoder.next_batch(&size, &column_block_view);
|
||||
@ -78,7 +78,7 @@ public:
|
||||
ASSERT_EQ ("Doris", value[2].to_string());
|
||||
|
||||
Slice* values2 = reinterpret_cast<Slice*>(arena.Allocate(size * sizeof(Slice)));
|
||||
ColumnBlock block2(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values2, null_bitmap, &arena);
|
||||
ColumnBlock block2(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values2, null_bitmap, 1, &arena);
|
||||
ColumnBlockView column_block_view2(&block2);
|
||||
|
||||
size_t fetch_num = 1;
|
||||
|
||||
@ -37,7 +37,7 @@ public:
|
||||
void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
|
||||
Arena arena;
|
||||
uint8_t null_bitmap = 0;
|
||||
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
|
||||
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
|
||||
size_t n = 1;
|
||||
@ -68,7 +68,7 @@ public:
|
||||
|
||||
CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
|
||||
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
|
||||
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
|
||||
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
|
||||
status = page_decoder.next_batch(&size, &column_block_view);
|
||||
|
||||
@ -72,7 +72,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
|
||||
std::unique_ptr<Field> field(FieldFactory::create(column));
|
||||
ColumnWriter writer(writer_opts, std::move(field), true, wfile.get());
|
||||
st = writer.init();
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_TRUE(st.ok()) << st.to_string();
|
||||
|
||||
for (int i = 0; i < num_rows; ++i) {
|
||||
st = writer.append(BitmapTest(src_is_null, i), src + i);
|
||||
@ -117,13 +117,13 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
|
||||
// sequence read
|
||||
{
|
||||
st = iter->seek_to_first();
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_TRUE(st.ok()) << st.to_string();
|
||||
|
||||
Arena arena;
|
||||
Type vals[1024];
|
||||
Type* vals_ = vals;
|
||||
uint8_t is_null[1024];
|
||||
ColumnBlock col(type_info, (uint8_t*)vals, is_null, &arena);
|
||||
ColumnBlock col(type_info, (uint8_t*)vals, is_null, 1024, &arena);
|
||||
|
||||
int idx = 0;
|
||||
while (true) {
|
||||
@ -155,7 +155,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
|
||||
Arena arena;
|
||||
Type vals[1024];
|
||||
uint8_t is_null[1024];
|
||||
ColumnBlock col(type_info, (uint8_t*)vals, is_null, &arena);
|
||||
ColumnBlock col(type_info, (uint8_t*)vals, is_null, 1024, &arena);
|
||||
|
||||
for (int rowid = 0; rowid < num_rows; rowid += 4025) {
|
||||
st = iter->seek_to_ordinal(rowid);
|
||||
|
||||
@ -35,7 +35,7 @@ public:
|
||||
void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
|
||||
Arena arena;
|
||||
uint8_t null_bitmap = 0;
|
||||
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
|
||||
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
|
||||
size_t n = 1;
|
||||
@ -66,7 +66,7 @@ public:
|
||||
Arena arena;
|
||||
CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
|
||||
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
|
||||
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
|
||||
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
size_t size_to_fetch = size;
|
||||
status = for_page_decoder.next_batch(&size_to_fetch, &column_block_view);
|
||||
|
||||
@ -47,7 +47,7 @@ public:
|
||||
void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
|
||||
Arena arena;
|
||||
uint8_t null_bitmap = 0;
|
||||
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
|
||||
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
|
||||
size_t n = 1;
|
||||
@ -78,7 +78,7 @@ public:
|
||||
|
||||
CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
|
||||
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
|
||||
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
|
||||
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
status = page_decoder.next_batch(&size, &column_block_view);
|
||||
ASSERT_TRUE(status.ok());
|
||||
|
||||
@ -38,7 +38,7 @@ public:
|
||||
void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
|
||||
Arena arena;
|
||||
uint8_t null_bitmap = 0;
|
||||
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
|
||||
ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &arena);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
|
||||
size_t n = 1;
|
||||
@ -70,7 +70,7 @@ public:
|
||||
|
||||
CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
|
||||
uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
|
||||
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
|
||||
ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &arena);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
size_t size_to_fetch = size;
|
||||
status = rle_page_decoder.next_batch(&size_to_fetch, &column_block_view);
|
||||
|
||||
49
be/test/olap/selection_vector_test.cpp
Normal file
49
be/test/olap/selection_vector_test.cpp
Normal file
@ -0,0 +1,49 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "olap/selection_vector.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class SelectionVectorTest : public testing::Test {
|
||||
|
||||
};
|
||||
|
||||
TEST_F(SelectionVectorTest, Normal) {
|
||||
SelectionVector sel_vel(10);
|
||||
ASSERT_EQ(10, sel_vel.nrows());
|
||||
sel_vel.set_all_true();
|
||||
ASSERT_EQ(" 0: 11111111 11 \n", sel_vel.to_string());
|
||||
sel_vel.set_all_false();
|
||||
ASSERT_EQ(" 0: 00000000 00 \n", sel_vel.to_string());
|
||||
sel_vel.set_row_selected(7);
|
||||
ASSERT_TRUE(sel_vel.is_row_selected(7));
|
||||
ASSERT_TRUE(sel_vel.any_selected());
|
||||
ASSERT_EQ(" 0: 00000001 00 \n", sel_vel.to_string());
|
||||
sel_vel.clear_bit(7);
|
||||
ASSERT_EQ(" 0: 00000000 00 \n", sel_vel.to_string());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
@ -278,6 +278,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/short_key_index_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/key_coder_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/page_cache_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/hll_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/selection_vector_test
|
||||
|
||||
# Running routine load test
|
||||
${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test
|
||||
|
||||
Reference in New Issue
Block a user