From b537e06ecdd3bc8009ccdf6549376cd567723d3a Mon Sep 17 00:00:00 2001 From: ZenoYang Date: Thu, 17 Mar 2022 10:07:30 +0800 Subject: [PATCH] [improvement](vectorized) Make bloom filter predicate run short-circuit logic (#8484) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current BloomFilter runs vectorization predicate evaluate, but `evaluate_vec` interface is not implemented, so the RuntimeFilter does not play a role after it is pushed down to the storage layer. And BF predicate computation cannot be automatically vectorized, thus making BloomFilter run short-circuit logic. For SSB Q2.1,`enable_storage_vectorization = true;` ``` test before impl: - Total: 36s164ms - RowsVectorPredFiltered: 0 - RealRuntimeFilterType: bloomfilter - HasPushDownToEngine: true test after impl: - Total: 2s345ms - RowsVectorPredFiltered: 595.247102M (595247102) - RealRuntimeFilterType: bloomfilter - HasPushDownToEngine: true ``` --- be/src/olap/bloom_filter_predicate.h | 2 ++ be/src/olap/column_predicate.h | 2 ++ be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index ff3201c692..c86e991020 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -65,6 +65,8 @@ public: void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; + bool is_bloom_filter_predicate() override { return true; } + private: std::shared_ptr _filter; SpecificFilter* _specific_filter; // owned by _filter diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 6b1aa232a1..b356a9b562 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -71,6 +71,8 @@ public: virtual bool is_in_predicate() { return false; } + virtual bool is_bloom_filter_predicate() { return false; } + protected: uint32_t _column_id; bool _opposite; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index d428dfd8fd..fb06911557 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -613,7 +613,9 @@ void SegmentIterator::_vec_init_lazy_materialization() { _is_pred_column[cid] = true; pred_column_ids.insert(cid); - if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_STRING || predicate->is_in_predicate()) { + if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR + || type == OLAP_FIELD_TYPE_STRING || predicate->is_in_predicate() + || predicate->is_bloom_filter_predicate()) { short_cir_pred_col_id_set.insert(cid); _short_cir_eval_predicate.push_back(predicate); _is_all_column_basic_type = false;