From 1e47d115609f75c3c427ba1d85818e2dcfd4784d Mon Sep 17 00:00:00 2001 From: Pxl Date: Thu, 22 Aug 2024 00:37:25 +0800 Subject: [PATCH] [Improvement](runtime-filter) send RUNTIME_BLOOM_FILTER_MAX_SIZE to backends (#39686) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ackends (#38972) ## Proposed changes pick from #38972 --- be/src/exprs/bloom_filter_func.h | 31 +++++++++++-------- be/src/exprs/runtime_filter.cpp | 3 ++ be/src/exprs/runtime_filter.h | 1 + .../org/apache/doris/qe/SessionVariable.java | 1 + gensrc/thrift/PaloInternalService.thrift | 2 ++ 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h index 95d5064244..6d452bbe99 100644 --- a/be/src/exprs/bloom_filter_func.h +++ b/be/src/exprs/bloom_filter_func.h @@ -100,14 +100,14 @@ public: virtual ~BloomFilterFuncBase() = default; void init_params(const RuntimeFilterParams* params) { - _bloom_filter_length = - params->runtime_bloom_filter_min_size > 0 - ? std::max(params->bloom_filter_size, params->runtime_bloom_filter_min_size) - : params->bloom_filter_size; + _bloom_filter_length = params->bloom_filter_size; + _build_bf_exactly = params->build_bf_exactly; _runtime_bloom_filter_min_size = params->runtime_bloom_filter_min_size; + _runtime_bloom_filter_max_size = params->runtime_bloom_filter_max_size; _null_aware = params->null_aware; _bloom_filter_size_calculated_by_ndv = params->bloom_filter_size_calculated_by_ndv; + _limit_length(); } Status init_with_fixed_length() { return init_with_fixed_length(_bloom_filter_length); } @@ -128,17 +128,11 @@ public: // if FE do use ndv stat to predict the bf size, BE only use the row count. FE have more // exactly row count stat. which one is min is more correctly. if (_bloom_filter_size_calculated_by_ndv) { - _bloom_filter_length = - _runtime_bloom_filter_min_size > 0 - ? std::max(_runtime_bloom_filter_min_size, - std::min(be_calculate_size, _bloom_filter_length)) - : std::min(be_calculate_size, _bloom_filter_length); + _bloom_filter_length = std::min(be_calculate_size, _bloom_filter_length); } else { - _bloom_filter_length = - _runtime_bloom_filter_min_size > 0 - ? std::max(_runtime_bloom_filter_min_size, be_calculate_size) - : be_calculate_size; + _bloom_filter_length = be_calculate_size; } + _limit_length(); } return init_with_fixed_length(_bloom_filter_length); } @@ -229,6 +223,16 @@ public: uint16_t* offsets, int number, bool is_parse_column) = 0; +private: + void _limit_length() { + if (_runtime_bloom_filter_min_size > 0) { + _bloom_filter_length = std::max(_bloom_filter_length, _runtime_bloom_filter_min_size); + } + if (_runtime_bloom_filter_max_size > 0) { + _bloom_filter_length = std::min(_bloom_filter_length, _runtime_bloom_filter_max_size); + } + } + protected: // bloom filter size int32_t _bloom_filter_alloced; @@ -236,6 +240,7 @@ protected: bool _inited = false; int64_t _bloom_filter_length; int64_t _runtime_bloom_filter_min_size; + int64_t _runtime_bloom_filter_max_size; bool _build_bf_exactly = false; bool _bloom_filter_size_calculated_by_ndv = false; }; diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index b03676d43c..102846cbcf 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -1396,6 +1396,9 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue params.runtime_bloom_filter_min_size = options->__isset.runtime_bloom_filter_min_size ? options->runtime_bloom_filter_min_size : 0; + params.runtime_bloom_filter_max_size = options->__isset.runtime_bloom_filter_max_size + ? options->runtime_bloom_filter_max_size + : 0; // We build runtime filter by exact distinct count iff three conditions are met: // 1. Only 1 join key // 2. Do not have remote target (e.g. do not need to merge), or broadcast join diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index 3acca8cd4e..b71bbd0648 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -129,6 +129,7 @@ struct RuntimeFilterParams { int64_t bloom_filter_size; int32_t max_in_num; int64_t runtime_bloom_filter_min_size; + int64_t runtime_bloom_filter_max_size; int32_t filter_id; bool bitmap_filter_not_in; bool build_bf_exactly; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 5ca5bf1d36..eca92687a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -3408,6 +3408,7 @@ public class SessionVariable implements Serializable, Writable { tResult.setRuntimeFilterWaitTimeMs(runtimeFilterWaitTimeMs); tResult.setRuntimeFilterMaxInNum(runtimeFilterMaxInNum); tResult.setRuntimeBloomFilterMinSize(runtimeBloomFilterMinSize); + tResult.setRuntimeBloomFilterMaxSize(runtimeBloomFilterMaxSize); tResult.setRuntimeFilterWaitInfinitely(runtimeFilterWaitInfinitely); if (cpuResourceLimit > 0) { diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 9c80041f2a..41b8fb8cf0 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -320,6 +320,8 @@ struct TQueryOptions { 125: optional bool enable_match_without_inverted_index = true; 126: optional bool enable_fallback_on_missing_inverted_index = true; + 127: optional i32 runtime_bloom_filter_max_size = 16777216; + // For cloud, to control if the content would be written into file cache 1000: optional bool disable_file_cache = false }