[Enhancement](bitmapfilter) Support bitmap filter to apply zone_map index to filter pages (#14635)
This commit is contained in:
@ -17,7 +17,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include <algorithm>
|
||||
|
||||
#include "gutil/integral_types.h"
|
||||
#include "runtime/define_primitive_type.h"
|
||||
#include "runtime/primitive_type.h"
|
||||
@ -30,14 +31,15 @@ class BitmapFilterFuncBase {
|
||||
public:
|
||||
virtual void insert(const void* data) = 0;
|
||||
virtual void insert_many(const std::vector<const BitmapValue*> bitmaps) = 0;
|
||||
virtual bool find(uint64 data) const = 0;
|
||||
virtual bool is_empty() = 0;
|
||||
virtual bool empty() = 0;
|
||||
virtual Status assign(BitmapValue* bitmap_value) = 0;
|
||||
virtual void light_copy(BitmapFilterFuncBase* other) { _not_in = other->_not_in; };
|
||||
virtual uint16_t find_fixed_len_olap_engine(const char* data, const uint8* nullmap,
|
||||
uint16_t* offsets, int number) = 0;
|
||||
virtual void find_batch(const char* data, const uint8* nullmap, int number,
|
||||
uint8* results) const = 0;
|
||||
virtual size_t size() const = 0;
|
||||
bool is_not_in() const { return _not_in; }
|
||||
void set_not_in(bool not_in) { _not_in = not_in; }
|
||||
virtual ~BitmapFilterFuncBase() = default;
|
||||
|
||||
@ -51,7 +53,7 @@ class BitmapFilterFunc : public BitmapFilterFuncBase {
|
||||
public:
|
||||
using CppType = typename PrimitiveTypeTraits<type>::CppType;
|
||||
|
||||
BitmapFilterFunc() : _bitmap_value(std::make_shared<BitmapValue>()), _empty(true) {}
|
||||
BitmapFilterFunc() : _bitmap_value(std::make_shared<BitmapValue>()) {}
|
||||
|
||||
~BitmapFilterFunc() override = default;
|
||||
|
||||
@ -59,15 +61,13 @@ public:
|
||||
|
||||
void insert_many(const std::vector<const BitmapValue*> bitmaps) override;
|
||||
|
||||
bool find(uint64 data) const override { return _not_in ^ _bitmap_value->contains(data); }
|
||||
|
||||
uint16_t find_fixed_len_olap_engine(const char* data, const uint8* nullmap, uint16_t* offsets,
|
||||
int number) override;
|
||||
|
||||
void find_batch(const char* data, const uint8* nullmap, int number,
|
||||
uint8* results) const override;
|
||||
|
||||
bool is_empty() override { return _empty; }
|
||||
bool empty() override { return _bitmap_value->empty(); }
|
||||
|
||||
Status assign(BitmapValue* bitmap_value) override {
|
||||
*_bitmap_value = *bitmap_value;
|
||||
@ -76,9 +76,25 @@ public:
|
||||
|
||||
void light_copy(BitmapFilterFuncBase* bloomfilter_func) override;
|
||||
|
||||
size_t size() const override { return _bitmap_value->cardinality(); }
|
||||
|
||||
uint64_t max() { return _bitmap_value->max(nullptr); }
|
||||
|
||||
uint64_t min() { return _bitmap_value->min(nullptr); }
|
||||
|
||||
bool contains_any(CppType left, CppType right) {
|
||||
if (right < 0) {
|
||||
return false;
|
||||
}
|
||||
return _bitmap_value->contains_any(std::max(left, (CppType)0), right);
|
||||
}
|
||||
|
||||
std::shared_ptr<BitmapValue> get_inner_bitmap() { return _bitmap_value; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<BitmapValue> _bitmap_value;
|
||||
bool _empty;
|
||||
|
||||
bool find(CppType data) const { return _not_in ^ (data >= 0 && _bitmap_value->contains(data)); }
|
||||
};
|
||||
|
||||
template <PrimitiveType type>
|
||||
@ -88,7 +104,6 @@ void BitmapFilterFunc<type>::insert(const void* data) {
|
||||
}
|
||||
|
||||
*_bitmap_value |= *reinterpret_cast<const BitmapValue*>(data);
|
||||
_empty = false;
|
||||
}
|
||||
|
||||
template <PrimitiveType type>
|
||||
@ -97,7 +112,6 @@ void BitmapFilterFunc<type>::insert_many(const std::vector<const BitmapValue*> b
|
||||
return;
|
||||
}
|
||||
_bitmap_value->fastunion(bitmaps);
|
||||
_empty = false;
|
||||
}
|
||||
|
||||
template <PrimitiveType type>
|
||||
@ -136,7 +150,6 @@ template <PrimitiveType type>
|
||||
void BitmapFilterFunc<type>::light_copy(BitmapFilterFuncBase* bitmapfilter_func) {
|
||||
BitmapFilterFuncBase::light_copy(bitmapfilter_func);
|
||||
auto other_func = reinterpret_cast<BitmapFilterFunc*>(bitmapfilter_func);
|
||||
_empty = other_func->_empty;
|
||||
_bitmap_value = other_func->_bitmap_value;
|
||||
}
|
||||
|
||||
|
||||
@ -17,6 +17,8 @@
|
||||
|
||||
#include "runtime_filter.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "common/status.h"
|
||||
#include "exprs/binary_predicate.h"
|
||||
@ -457,8 +459,8 @@ public:
|
||||
return Status::OK();
|
||||
}
|
||||
case RuntimeFilterType::BITMAP_FILTER: {
|
||||
_context._bitmap_filter_func.reset(create_bitmap_filter(_column_return_type));
|
||||
_context._bitmap_filter_func->set_not_in(params->bitmap_filter_not_in);
|
||||
_context.bitmap_filter_func.reset(create_bitmap_filter(_column_return_type));
|
||||
_context.bitmap_filter_func->set_not_in(params->bitmap_filter_not_in);
|
||||
return Status::OK();
|
||||
}
|
||||
default:
|
||||
@ -524,7 +526,7 @@ public:
|
||||
break;
|
||||
}
|
||||
case RuntimeFilterType::BITMAP_FILTER: {
|
||||
_context._bitmap_filter_func->insert(data);
|
||||
_context.bitmap_filter_func->insert(data);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -602,7 +604,7 @@ public:
|
||||
for (int index : rows) {
|
||||
bitmaps.push_back(&(col->get_data()[index]));
|
||||
}
|
||||
_context._bitmap_filter_func->insert_many(bitmaps);
|
||||
_context.bitmap_filter_func->insert_many(bitmaps);
|
||||
}
|
||||
|
||||
RuntimeFilterType get_real_type() {
|
||||
@ -1086,6 +1088,10 @@ public:
|
||||
|
||||
size_t get_in_filter_size() const { return _context.hybrid_set->size(); }
|
||||
|
||||
std::shared_ptr<BitmapFilterFuncBase> get_bitmap_filter() const {
|
||||
return _context.bitmap_filter_func;
|
||||
}
|
||||
|
||||
friend class IRuntimeFilter;
|
||||
|
||||
private:
|
||||
@ -1260,6 +1266,11 @@ void IRuntimeFilter::signal() {
|
||||
if (_wrapper->get_real_type() == RuntimeFilterType::IN_FILTER) {
|
||||
_profile->add_info_string("InFilterSize", std::to_string(_wrapper->get_in_filter_size()));
|
||||
}
|
||||
if (_wrapper->get_real_type() == RuntimeFilterType::BITMAP_FILTER) {
|
||||
auto bitmap_filter = _wrapper->get_bitmap_filter();
|
||||
_profile->add_info_string("BitmapSize", std::to_string(bitmap_filter->size()));
|
||||
_profile->add_info_string("IsNotIn", bitmap_filter->is_not_in() ? "true" : "false");
|
||||
}
|
||||
}
|
||||
|
||||
BloomFilterFuncBase* IRuntimeFilter::get_bloomfilter() const {
|
||||
@ -1935,7 +1946,7 @@ Status RuntimePredicateWrapper::get_push_vexprs(std::vector<doris::vectorized::V
|
||||
node.__set_vector_opcode(to_in_opcode(_column_return_type));
|
||||
node.__set_is_nullable(false);
|
||||
auto bitmap_pred = _pool->add(new vectorized::VBitmapPredicate(node));
|
||||
bitmap_pred->set_filter(_context._bitmap_filter_func);
|
||||
bitmap_pred->set_filter(_context.bitmap_filter_func);
|
||||
auto cloned_vexpr = vprob_expr->root()->clone(_pool);
|
||||
bitmap_pred->add_child(cloned_vexpr);
|
||||
auto wrapper = _pool->add(new vectorized::VRuntimeFilterWrapper(node, bitmap_pred));
|
||||
|
||||
@ -43,6 +43,7 @@ class PMinMaxFilter;
|
||||
class HashJoinNode;
|
||||
class RuntimeProfile;
|
||||
class BloomFilterFuncBase;
|
||||
class BitmapFilterFuncBase;
|
||||
|
||||
namespace vectorized {
|
||||
class VExpr;
|
||||
@ -252,6 +253,8 @@ public:
|
||||
|
||||
void ready_for_publish();
|
||||
|
||||
std::shared_ptr<BitmapFilterFuncBase> get_bitmap_filter() const;
|
||||
|
||||
static bool enable_use_batch(int be_exec_version, PrimitiveType type) {
|
||||
return be_exec_version > 0 && (is_int_or_bool(type) || is_float_or_double(type));
|
||||
}
|
||||
|
||||
@ -17,9 +17,14 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "exprs/bitmapfilter_predicate.h"
|
||||
#include "exprs/cast_functions.h"
|
||||
#include "exprs/runtime_filter.h"
|
||||
#include "olap/column_predicate.h"
|
||||
#include "olap/wrapper_field.h"
|
||||
#include "util/bitmap_value.h"
|
||||
#include "vec/columns/column_dictionary.h"
|
||||
#include "vec/columns/column_nullable.h"
|
||||
#include "vec/columns/column_vector.h"
|
||||
@ -51,6 +56,26 @@ public:
|
||||
void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size,
|
||||
bool* flags) const override {};
|
||||
|
||||
bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
|
||||
if (_specific_filter->is_not_in()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
CppType max_value;
|
||||
if (statistic.second->is_null()) {
|
||||
// no non-null values
|
||||
return false;
|
||||
} else {
|
||||
max_value = *reinterpret_cast<const CppType*>(statistic.second->cell_ptr());
|
||||
}
|
||||
|
||||
CppType min_value =
|
||||
statistic.first->is_null() /* contains null values */
|
||||
? 0
|
||||
: *reinterpret_cast<const CppType*>(statistic.first->cell_ptr());
|
||||
return _specific_filter->contains_any(min_value, max_value);
|
||||
}
|
||||
|
||||
Status evaluate(BitmapIndexIterator*, uint32_t, roaring::Roaring*) const override {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -84,25 +109,7 @@ private:
|
||||
};
|
||||
|
||||
template <PrimitiveType T>
|
||||
void BitmapFilterColumnPredicate<T>::evaluate(ColumnBlock* block, uint16_t* sel,
|
||||
uint16_t* size) const {
|
||||
uint16_t new_size = 0;
|
||||
if (block->is_nullable()) {
|
||||
for (uint16_t i = 0; i < *size; ++i) {
|
||||
uint16_t idx = sel[i];
|
||||
sel[new_size] = idx;
|
||||
new_size += (!block->cell(idx).is_null() &&
|
||||
_specific_filter->find(*block->cell(idx).cell_ptr()));
|
||||
}
|
||||
} else {
|
||||
for (uint16_t i = 0; i < *size; ++i) {
|
||||
uint16_t idx = sel[i];
|
||||
sel[new_size] = idx;
|
||||
new_size += _specific_filter->find(*block->cell(idx).cell_ptr());
|
||||
}
|
||||
}
|
||||
*size = new_size;
|
||||
}
|
||||
void BitmapFilterColumnPredicate<T>::evaluate(ColumnBlock*, uint16_t*, uint16_t*) const {}
|
||||
|
||||
template <PrimitiveType T>
|
||||
uint16_t BitmapFilterColumnPredicate<T>::evaluate(const vectorized::IColumn& column, uint16_t* sel,
|
||||
|
||||
@ -286,6 +286,7 @@ struct OlapReaderStatistics {
|
||||
// block_load_ns
|
||||
// block_init_ns
|
||||
// block_init_seek_ns
|
||||
// block_conditions_filtered_ns
|
||||
// first_read_ns
|
||||
// block_first_read_seek_ns
|
||||
// lazy_read_ns
|
||||
@ -322,6 +323,7 @@ struct OlapReaderStatistics {
|
||||
int64_t rows_del_by_bitmap = 0;
|
||||
// the number of rows filtered by various column indexes.
|
||||
int64_t rows_conditions_filtered = 0;
|
||||
int64_t block_conditions_filtered_ns = 0;
|
||||
|
||||
int64_t index_load_ns = 0;
|
||||
|
||||
|
||||
@ -282,6 +282,7 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra
|
||||
}
|
||||
|
||||
Status SegmentIterator::_get_row_ranges_by_column_conditions() {
|
||||
SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_ns);
|
||||
if (_row_bitmap.isEmpty()) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -21,6 +21,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdarg>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
@ -32,6 +33,7 @@
|
||||
#include <utility>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "gutil/integral_types.h"
|
||||
#include "udf/udf.h"
|
||||
#include "util/coding.h"
|
||||
#include "vec/common/pod_array.h"
|
||||
@ -1430,6 +1432,9 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
// true if contains a value that belongs to the range [left, right].
|
||||
bool contains_any(uint64_t left, uint64_t right) const;
|
||||
|
||||
uint64_t cardinality() const {
|
||||
switch (_type) {
|
||||
case EMPTY:
|
||||
@ -1677,6 +1682,16 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t max(bool* empty) const {
|
||||
return min_or_max(empty, [&]() { return _bitmap.maximum(); });
|
||||
}
|
||||
|
||||
uint64_t min(bool* empty) const {
|
||||
return min_or_max(empty, [&]() { return _bitmap.minimum(); });
|
||||
}
|
||||
|
||||
bool empty() const { return _type == EMPTY; }
|
||||
|
||||
/**
|
||||
* Return new set with specified range (not include the range_end)
|
||||
*/
|
||||
@ -1823,6 +1838,7 @@ public:
|
||||
|
||||
b_iterator begin() const;
|
||||
b_iterator end() const;
|
||||
b_iterator lower_bound(uint64_t val) const;
|
||||
|
||||
private:
|
||||
void _convert_to_smaller_type() {
|
||||
@ -1839,6 +1855,25 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
|
||||
bool is_empty = false;
|
||||
uint64_t result = 0;
|
||||
switch (_type) {
|
||||
case SINGLE:
|
||||
result = _sv;
|
||||
break;
|
||||
case BITMAP:
|
||||
result = func();
|
||||
break;
|
||||
default:
|
||||
is_empty = true;
|
||||
}
|
||||
if (empty) {
|
||||
*empty = is_empty;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
enum BitmapDataType {
|
||||
EMPTY = 0,
|
||||
SINGLE = 1, // single element
|
||||
@ -1876,7 +1911,9 @@ public:
|
||||
}
|
||||
|
||||
BitmapValueIterator(const BitmapValueIterator& other)
|
||||
: _bitmap(other._bitmap), _iter(other._iter), _sv(other._sv), _end(other._end) {}
|
||||
: _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
|
||||
_iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
|
||||
}
|
||||
|
||||
~BitmapValueIterator() {
|
||||
if (_iter != nullptr) {
|
||||
@ -1930,7 +1967,9 @@ public:
|
||||
}
|
||||
|
||||
bool operator==(const BitmapValueIterator& other) const {
|
||||
if (_end && other._end) return true;
|
||||
if (_end && other._end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (_bitmap._type) {
|
||||
case BitmapValue::BitmapDataType::EMPTY:
|
||||
@ -1947,6 +1986,27 @@ public:
|
||||
|
||||
bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
|
||||
|
||||
/**
|
||||
* Move the iterator to the first value >= `val`.
|
||||
*/
|
||||
BitmapValueIterator& move(uint64_t val) {
|
||||
switch (_bitmap._type) {
|
||||
case BitmapValue::BitmapDataType::SINGLE:
|
||||
if (_sv < val) {
|
||||
_end = true;
|
||||
}
|
||||
break;
|
||||
case BitmapValue::BitmapDataType::BITMAP:
|
||||
if (!_iter->move(val)) {
|
||||
_end = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
const BitmapValue& _bitmap;
|
||||
detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
|
||||
@ -1962,4 +2022,16 @@ inline BitmapValueIterator BitmapValue::end() const {
|
||||
return BitmapValueIterator(*this, true);
|
||||
}
|
||||
|
||||
inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
|
||||
return BitmapValueIterator(*this).move(val);
|
||||
}
|
||||
|
||||
inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
|
||||
if (left > right) {
|
||||
return false;
|
||||
}
|
||||
auto it = lower_bound(left);
|
||||
return it != end() && *it <= right;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -66,6 +66,7 @@ Status NewOlapScanNode::_init_profile() {
|
||||
_block_init_timer = ADD_TIMER(_segment_profile, "BlockInitTime");
|
||||
_block_init_seek_timer = ADD_TIMER(_segment_profile, "BlockInitSeekTime");
|
||||
_block_init_seek_counter = ADD_COUNTER(_segment_profile, "BlockInitSeekCount", TUnit::UNIT);
|
||||
_block_conditions_filtered_timer = ADD_TIMER(_segment_profile, "BlockConditionsFilteredTime");
|
||||
|
||||
_rows_vec_cond_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT);
|
||||
_vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime");
|
||||
|
||||
@ -95,6 +95,7 @@ private:
|
||||
RuntimeProfile::Counter* _block_init_timer = nullptr;
|
||||
RuntimeProfile::Counter* _block_init_seek_timer = nullptr;
|
||||
RuntimeProfile::Counter* _block_init_seek_counter = nullptr;
|
||||
RuntimeProfile::Counter* _block_conditions_filtered_timer = nullptr;
|
||||
RuntimeProfile::Counter* _first_read_timer = nullptr;
|
||||
RuntimeProfile::Counter* _first_read_seek_timer = nullptr;
|
||||
RuntimeProfile::Counter* _first_read_seek_counter = nullptr;
|
||||
|
||||
@ -386,6 +386,8 @@ void NewOlapScanner::_update_counters_before_close() {
|
||||
COUNTER_UPDATE(olap_parent->_block_init_timer, stats.block_init_ns);
|
||||
COUNTER_UPDATE(olap_parent->_block_init_seek_timer, stats.block_init_seek_ns);
|
||||
COUNTER_UPDATE(olap_parent->_block_init_seek_counter, stats.block_init_seek_num);
|
||||
COUNTER_UPDATE(olap_parent->_block_conditions_filtered_timer,
|
||||
stats.block_conditions_filtered_ns);
|
||||
COUNTER_UPDATE(olap_parent->_first_read_timer, stats.first_read_ns);
|
||||
COUNTER_UPDATE(olap_parent->_first_read_seek_timer, stats.block_first_read_seek_ns);
|
||||
COUNTER_UPDATE(olap_parent->_first_read_seek_counter, stats.block_first_read_seek_num);
|
||||
|
||||
@ -42,7 +42,7 @@ struct SharedRuntimeFilterContext {
|
||||
std::shared_ptr<MinMaxFuncBase> minmax_func;
|
||||
std::shared_ptr<HybridSetBase> hybrid_set;
|
||||
std::shared_ptr<BloomFilterFuncBase> bloom_filter_func;
|
||||
std::shared_ptr<BitmapFilterFuncBase> _bitmap_filter_func;
|
||||
std::shared_ptr<BitmapFilterFuncBase> bitmap_filter_func;
|
||||
};
|
||||
|
||||
struct SharedHashTableContext {
|
||||
|
||||
Reference in New Issue
Block a user