From d5629a2b368a7ede4666543c7efa35a52b77925d Mon Sep 17 00:00:00 2001 From: XIAO-HOU <372060054@qq.com> Date: Tue, 18 Jun 2024 01:26:00 +0000 Subject: [PATCH] [FEAT MERGE] Support monotonic filter to accelerate queries. Co-authored-by: jingtaoye35 <1255153887@qq.com> Co-authored-by: qingzhu521 --- deps/oblib/src/lib/charset/ob_charset.cpp | 15 + deps/oblib/src/lib/charset/ob_charset.h | 3 + deps/oblib/src/lib/charset/ob_ctype.h | 1 + .../oblib/src/lib/charset/ob_ctype_gb18030.cc | 8 +- deps/oblib/src/lib/charset/ob_ctype_gbk.cc | 2 +- deps/oblib/src/lib/charset/ob_ctype_latin1.cc | 2 +- deps/oblib/src/lib/charset/ob_ctype_uca.cc | 4 +- deps/oblib/src/lib/charset/ob_ctype_utf16.cc | 2 +- deps/oblib/src/lib/charset/ob_ctype_utf8.cc | 2 +- src/sql/code_generator/ob_tsc_cg_service.cpp | 4 +- src/sql/engine/basic/ob_pushdown_filter.cpp | 120 +++++++- src/sql/engine/basic/ob_pushdown_filter.h | 41 ++- src/sql/engine/expr/ob_expr_lrpad.cpp | 2 +- src/sql/optimizer/ob_log_table_scan.cpp | 199 ++++++++++++- src/sql/optimizer/ob_log_table_scan.h | 32 +- src/sql/optimizer/ob_optimizer_util.cpp | 274 +++++++++++++++++- src/sql/optimizer/ob_optimizer_util.h | 25 ++ src/sql/plan_cache/ob_plan_match_helper.cpp | 1 - .../ob_transform_where_subquery_pullup.cpp | 1 + .../access/ob_index_tree_prefetcher.cpp | 5 +- .../access/ob_sstable_index_filter.cpp | 55 ++-- src/storage/access/ob_sstable_index_filter.h | 18 +- .../cs_encoding/ob_dict_column_decoder.cpp | 134 ++++++--- .../cs_encoding/ob_dict_column_decoder.h | 9 + .../blocksstable/encoding/ob_dict_decoder.cpp | 38 ++- .../blocksstable/encoding/ob_dict_decoder.h | 8 + .../ob_skip_index_filter_executor.cpp | 113 +++++++- .../ob_skip_index_filter_executor.h | 26 +- .../ob_micro_block_row_scanner.cpp | 2 +- src/storage/column_store/ob_cg_prefetcher.cpp | 9 +- src/storage/ob_storage_util.cpp | 85 ++++++ src/storage/ob_storage_util.h | 16 + .../blocksstable/test_skip_index_filter.cpp | 4 +- .../test_sstable_index_filter.cpp | 17 +- 34 files changed, 1105 insertions(+), 172 deletions(-) diff --git a/deps/oblib/src/lib/charset/ob_charset.cpp b/deps/oblib/src/lib/charset/ob_charset.cpp index bdd5f0e06..c92a3ef88 100644 --- a/deps/oblib/src/lib/charset/ob_charset.cpp +++ b/deps/oblib/src/lib/charset/ob_charset.cpp @@ -2094,6 +2094,21 @@ bool ObCharset::is_bin_sort(ObCollationType collation_type) return ret; } +bool ObCharset::is_ci_collate(ObCollationType collation_type) +{ + bool ret = false; + if (OB_UNLIKELY(collation_type <= CS_TYPE_INVALID || + collation_type >= CS_TYPE_MAX) || + OB_ISNULL(ObCharset::charset_arr[collation_type])) { + LOG_WARN("unexpected error. invalid argument(s)", + K(ret), K(collation_type), K(lbt())); + } else { + ObCharsetInfo *cs = static_cast(ObCharset::charset_arr[collation_type]); + ret = (0 != (cs->state & OB_CS_CI)); + } + return ret; +} + ObCharsetType ObCharset::default_charset_type_ = CHARSET_UTF8MB4; ObCollationType ObCharset::default_collation_type_ = CS_TYPE_UTF8MB4_GENERAL_CI; diff --git a/deps/oblib/src/lib/charset/ob_charset.h b/deps/oblib/src/lib/charset/ob_charset.h index e5efb0216..4cfb012d1 100644 --- a/deps/oblib/src/lib/charset/ob_charset.h +++ b/deps/oblib/src/lib/charset/ob_charset.h @@ -429,6 +429,9 @@ public: ObCollationLevel &res_level, ObCollationType &res_type); static bool is_bin_sort(ObCollationType collation_type); + + static bool is_ci_collate(ObCollationType collation_type); + static ObCollationType get_bin_collation(const ObCharsetType charset_type); static int first_valid_char(const ObCollationType collation_type, const char *buf, diff --git a/deps/oblib/src/lib/charset/ob_ctype.h b/deps/oblib/src/lib/charset/ob_ctype.h index 8368c497d..3fef65bf2 100644 --- a/deps/oblib/src/lib/charset/ob_ctype.h +++ b/deps/oblib/src/lib/charset/ob_ctype.h @@ -68,6 +68,7 @@ #define OB_CS_NONASCII 8192 #define OB_CS_UNICODE_SUPPLEMENT 16384 #define OB_CS_LOWER_SORT 32768 +#define OB_CS_CI 65536 #define OB_CHARSET_UNDEFINED 0 /* Character repertoire flags */ diff --git a/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc b/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc index 086d9f778..87c08e14b 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc @@ -994,7 +994,7 @@ ObCharsetInfo ob_charset_gb18030_chinese_ci = { oceanbase::common::CS_TYPE_GB18030_CHINESE_CI, 0, 0, /* number */ - OB_CS_COMPILED | OB_CS_PRIMARY | OB_CS_STRNXFRM, /* state */ + OB_CS_COMPILED | OB_CS_PRIMARY | OB_CS_STRNXFRM | OB_CS_CI, /* state */ "gb18030", /* cs name */ "gb18030_chinese_ci", /* name */ "", /* comment */ @@ -1662,7 +1662,7 @@ ObCharsetInfo ob_charset_gb18030_2022_pinyin_ci = oceanbase::common::CS_TYPE_GB18030_2022_PINYIN_CI, 0, 0, - OB_CS_COMPILED | OB_CS_PRIMARY | OB_CS_STRNXFRM, /* state */ + OB_CS_COMPILED | OB_CS_PRIMARY | OB_CS_STRNXFRM | OB_CS_CI, /* state */ "gb18030_2022", /* cs name */ "gb18030_2022_chinese_ci", /* name */ "", /* comment */ @@ -1734,7 +1734,7 @@ ObCharsetInfo ob_charset_gb18030_2022_radical_ci = oceanbase::common::CS_TYPE_GB18030_2022_RADICAL_CI, 0, 0, - OB_CS_COMPILED | OB_CS_STRNXFRM, /* state */ + OB_CS_COMPILED | OB_CS_STRNXFRM | OB_CS_CI, /* state */ "gb18030_2022", /* cs name */ "gb18030_2022_radical_ci", /* name */ "", /* comment */ @@ -1806,7 +1806,7 @@ ObCharsetInfo ob_charset_gb18030_2022_stroke_ci = oceanbase::common::CS_TYPE_GB18030_2022_STROKE_CI, 0, 0, - OB_CS_COMPILED | OB_CS_STRNXFRM, /* state */ + OB_CS_COMPILED | OB_CS_STRNXFRM | OB_CS_CI, /* state */ "gb18030_2022", /* cs name */ "gb18030_2022_stroke_ci", /* name */ "", /* comment */ diff --git a/deps/oblib/src/lib/charset/ob_ctype_gbk.cc b/deps/oblib/src/lib/charset/ob_ctype_gbk.cc index 3eee1e476..51f46efdc 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_gbk.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_gbk.cc @@ -374,7 +374,7 @@ static ObCharsetHandler ob_charset_gbk_handler= ObCharsetInfo ob_charset_gbk_chinese_ci= { 28,0,0, - OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM, + OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM|OB_CS_CI, "gbk", "gbk_chinese_ci", "", diff --git a/deps/oblib/src/lib/charset/ob_ctype_latin1.cc b/deps/oblib/src/lib/charset/ob_ctype_latin1.cc index 7a892bf98..50e122c4c 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_latin1.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_latin1.cc @@ -72,7 +72,7 @@ static ObCharsetHandler ob_charset_latin1_handler= ObCharsetInfo ob_charset_latin1 = { 8,0,0, - OB_CS_COMPILED | OB_CS_PRIMARY, + OB_CS_COMPILED | OB_CS_PRIMARY | OB_CS_CI, OB_LATIN1, OB_LATIN1_SWEDISH_CI, "cp1252 West European", diff --git a/deps/oblib/src/lib/charset/ob_ctype_uca.cc b/deps/oblib/src/lib/charset/ob_ctype_uca.cc index d3283082d..c9a41217f 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_uca.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_uca.cc @@ -2895,7 +2895,7 @@ static ObCollationHandler ob_collation_utf16_uca_handler = ObCharsetInfo ob_charset_utf8mb4_unicode_ci= { 224,0,0, - OB_CS_UTF8MB4_UCA_FLAGS, + OB_CS_UTF8MB4_UCA_FLAGS | OB_CS_CI, OB_UTF8MB4, OB_UTF8MB4_UNICODE_CI, "", @@ -2930,7 +2930,7 @@ ObCharsetInfo ob_charset_utf8mb4_unicode_ci= ObCharsetInfo ob_charset_utf16_unicode_ci= { 101,0,0, - OB_CS_UTF16_UCA_FLAGS, + OB_CS_UTF16_UCA_FLAGS | OB_CS_CI, OB_UTF16, OB_UTF16_UNICODE_CI, "", diff --git a/deps/oblib/src/lib/charset/ob_ctype_utf16.cc b/deps/oblib/src/lib/charset/ob_ctype_utf16.cc index 089d8bd64..575366acf 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_utf16.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_utf16.cc @@ -1222,7 +1222,7 @@ ObCharsetInfo ob_charset_utf16_bin= ObCharsetInfo ob_charset_utf16_general_ci= { 54,0,0, - OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM|OB_CS_UNICODE|OB_CS_NONASCII, + OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM|OB_CS_UNICODE|OB_CS_NONASCII|OB_CS_CI, OB_UTF16, OB_UTF16_GENERAL_CI, "UTF-16 Unicode", diff --git a/deps/oblib/src/lib/charset/ob_ctype_utf8.cc b/deps/oblib/src/lib/charset/ob_ctype_utf8.cc index 935274f40..e7b2e844b 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_utf8.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_utf8.cc @@ -1004,7 +1004,7 @@ static ObCollationHandler ob_collation_utf8mb4_bin_handler = ObCharsetInfo ob_charset_utf8mb4_general_ci= { 45,0,0, - OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM|OB_CS_UNICODE|OB_CS_UNICODE_SUPPLEMENT, + OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM|OB_CS_UNICODE|OB_CS_UNICODE_SUPPLEMENT|OB_CS_CI, OB_UTF8MB4, OB_UTF8MB4_GENERAL_CI, "UTF-8 Unicode", diff --git a/src/sql/code_generator/ob_tsc_cg_service.cpp b/src/sql/code_generator/ob_tsc_cg_service.cpp index d1f98fa4a..3933f8b43 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.cpp +++ b/src/sql/code_generator/ob_tsc_cg_service.cpp @@ -456,7 +456,7 @@ int ObTscCgService::generate_tsc_filter(const ObLogTableScan &op, ObTableScanSpe LOG_WARN("generate scan ctdef pushdown filter"); } else if (pd_filter) { ObPushdownFilterConstructor filter_constructor( - &cg_.phy_plan_->get_allocator(), cg_, + &cg_.phy_plan_->get_allocator(), cg_, &op, scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_use_column_store()); if (OB_FAIL(filter_constructor.apply( scan_pushdown_filters, scan_ctdef.pd_expr_spec_.pd_storage_filters_.get_pushdown_filter()))) { @@ -470,7 +470,7 @@ int ObTscCgService::generate_tsc_filter(const ObLogTableScan &op, ObTableScanSpe LOG_WARN("generate lookup ctdef pushdown filter failed", K(ret)); } else if (pd_filter) { ObPushdownFilterConstructor filter_constructor( - &cg_.phy_plan_->get_allocator(), cg_, + &cg_.phy_plan_->get_allocator(), cg_, &op, lookup_ctdef->pd_expr_spec_.pd_storage_flag_.is_use_column_store()); if (OB_FAIL(filter_constructor.apply( lookup_pushdown_filters, lookup_ctdef->pd_expr_spec_.pd_storage_filters_.get_pushdown_filter()))) { diff --git a/src/sql/engine/basic/ob_pushdown_filter.cpp b/src/sql/engine/basic/ob_pushdown_filter.cpp index 191b53dcc..bd5c184b9 100644 --- a/src/sql/engine/basic/ob_pushdown_filter.cpp +++ b/src/sql/engine/basic/ob_pushdown_filter.cpp @@ -60,7 +60,7 @@ OB_SERIALIZE_MEMBER(ObPushdownFilterNode, type_, n_child_, col_ids_); OB_SERIALIZE_MEMBER((ObPushdownAndFilterNode,ObPushdownFilterNode), is_runtime_filter_root_node_); OB_SERIALIZE_MEMBER((ObPushdownOrFilterNode,ObPushdownFilterNode)); OB_SERIALIZE_MEMBER((ObPushdownBlackFilterNode,ObPushdownFilterNode), - column_exprs_, filter_exprs_); + column_exprs_, filter_exprs_, assist_exprs_, mono_); OB_DEF_SERIALIZE(ObPushdownWhiteFilterNode) { int ret = OB_SUCCESS; @@ -373,6 +373,8 @@ int ObPushdownFilterConstructor::create_black_filter_node( ret = OB_ERR_UNEXPECTED; LOG_WARN("black filter node is null", K(ret)); } else if (FALSE_IT(black_filter_node = static_cast(filter_node))) { + } else if (OB_FAIL(get_black_filter_monotonicity(raw_expr, column_exprs, black_filter_node))) { + LOG_WARN("failed to get black filter monotonicity", K(ret)); } else if (0 < column_exprs.count()) { if (OB_FAIL(black_filter_node->col_ids_.init(column_exprs.count()))) { LOG_WARN("failed to init col ids", K(ret)); @@ -401,6 +403,39 @@ int ObPushdownFilterConstructor::create_black_filter_node( return ret; } +int ObPushdownFilterConstructor::get_black_filter_monotonicity( + const ObRawExpr *raw_expr, + common::ObIArray &column_exprs, + ObPushdownBlackFilterNode *black_filter_node) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(raw_expr) || OB_ISNULL(black_filter_node) || OB_ISNULL(op_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Unexpected status", K(ret), K(raw_expr), K(black_filter_node), K(op_)); + } else if (1 == column_exprs.count()) { + ObSEArray tmp_exprs; + PushdownFilterMonotonicity mono; + if (OB_FAIL(op_->get_filter_monotonicity(raw_expr, static_cast(column_exprs.at(0)), + mono, tmp_exprs))) { + LOG_WARN("Failed to get filter monotonicity", K(ret), KPC(raw_expr), K(column_exprs)); + } else if (OB_UNLIKELY(mono < MON_NON || mono > MON_EQ_DESC)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected filter monotonicity", K(mono)); + } else if (FALSE_IT(black_filter_node->mono_ = mono)) { + } else if (tmp_exprs.count() == 0) { + } else if (tmp_exprs.count() != 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected expr count", K(ret), K(tmp_exprs)); + } else if (OB_FAIL(black_filter_node->assist_exprs_.init(2))) { + LOG_WARN("Failed to init assist exprs", K(ret)); + } else if (OB_FAIL(static_cg_.generate_rt_exprs(tmp_exprs, black_filter_node->assist_exprs_))) { + LOG_WARN("Failed to generate rt exprs", K(ret), K(tmp_exprs)); + } + LOG_TRACE("[PUSHDOWN] check black filter monotonicity", K(ret), KPC(raw_expr), K(column_exprs), KPC(black_filter_node)); + } + return ret; +} + // For ObPushdownWhiteFilterNode, always has col_idx = 0 and column_exprs.count() = 1. // For ObPushdownDynamicFilterNode, possibly column_exprs.count() is greater than 1, // and col_idx ranges from 0 to column_exprs.count() - 1 @@ -481,7 +516,9 @@ int ObPushdownFilterConstructor::merge_filter_node( if (OB_ISNULL(other) || OB_ISNULL(dst) || dst == other) { } else if (dst->get_type() == other->get_type()) { if (dst->get_type() == PushdownFilterType::BLACK_FILTER - && is_array_equal(dst->get_col_ids(), other->get_col_ids())) + && is_array_equal(dst->get_col_ids(), other->get_col_ids()) + && !static_cast(dst)->is_monotonic() + && !static_cast(other)->is_monotonic()) { if (OB_FAIL(merged_node.push_back(other))) { LOG_WARN("failed to push back", K(ret)); @@ -1765,10 +1802,16 @@ int ObPhysicalFilterExecutor::init_eval_param(const int32_t cur_eval_info_cnt, c return ret; } -void ObPhysicalFilterExecutor::clear_evaluated_datums() +void ObPhysicalFilterExecutor::clear_evaluated_flags() { - for (int i = 0; i < n_datum_eval_flags_; i++) { - datum_eval_flags_[i]->unset(op_.get_eval_ctx().get_batch_idx()); + if (op_.is_vectorized()) { + for (int i = 0; i < n_datum_eval_flags_; i++) { + datum_eval_flags_[i]->unset(op_.get_eval_ctx().get_batch_idx()); + } + } else { + for (int i = 0; i < n_eval_infos_; i++) { + eval_infos_[i]->clear_evaluated_flag(); + } } } @@ -2000,12 +2043,7 @@ int ObWhiteFilterExecutor::filter(ObEvalCtx &eval_ctx, const sql::ObBitVector &s filtered = !res->is_true(batch_idx); } } - - if (op_.is_vectorized()) { - clear_evaluated_datums(); - } else { - clear_evaluated_infos(); - } + clear_evaluated_flags(); return ret; } @@ -2043,11 +2081,65 @@ int ObBlackFilterExecutor::filter(ObEvalCtx &eval_ctx, const sql::ObBitVector &s } } } + clear_evaluated_flags(); + return ret; +} - if (op_.is_vectorized()) { - clear_evaluated_datums(); +int ObBlackFilterExecutor::filter(blocksstable::ObStorageDatum &datum, const sql::ObBitVector &skip_bit, bool &ret_val) +{ + return ObPhysicalFilterExecutor::filter(&datum, 1, skip_bit, ret_val); +} + +int ObBlackFilterExecutor::judge_greater_or_less( + blocksstable::ObStorageDatum &datum, + const sql::ObBitVector &skip_bit, + const bool is_greater, + bool &ret_val) +{ + int ret = OB_SUCCESS; + ret_val = false; + sql::ObExpr *assist_expr = nullptr; + sql::ObExpr *column_expr = nullptr; + ObEvalCtx &eval_ctx = op_.get_eval_ctx(); + const common::ObIArray *column_exprs = get_cg_col_exprs(); + if (OB_UNLIKELY(nullptr == column_exprs || column_exprs->count() != 1 || + filter_.assist_exprs_.count() != 2 || + filter_.mono_ < MON_NON || filter_.mono_ > MON_EQ_DESC)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected filter status", K(ret), KPC(column_exprs), K_(filter)); + } else if (FALSE_IT(assist_expr = is_greater ? filter_.assist_exprs_.at(0) : filter_.assist_exprs_.at(1))) { + } else if (FALSE_IT(column_expr = column_exprs->at(0))) { + } else if (OB_ISNULL(assist_expr) || OB_ISNULL(column_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpect null expr", K(ret), K(filter_.assist_exprs_), KPC(column_exprs)); } else { - clear_evaluated_infos(); + ObDatum &expr_datum = column_expr->locate_datum_for_write(eval_ctx); + if (OB_FAIL(expr_datum.from_storage_datum(datum, column_expr->obj_datum_map_))) { + LOG_WARN("Failed to convert from datum", K(ret), K(datum)); + } else if (!op_.enable_rich_format_) { + ObDatum *cmp_res = nullptr; + if (OB_FAIL(assist_expr->eval(eval_ctx, cmp_res))) { + LOG_WARN("failed to filter child", K(ret)); + } else { + ret_val = !is_row_filtered(*cmp_res); + } + } else { + const int64_t batch_idx = eval_ctx.get_batch_idx(); + EvalBound eval_bound(eval_ctx.get_batch_size(), batch_idx, batch_idx + 1, true); + if (OB_FAIL(assist_expr->eval_vector(eval_ctx, skip_bit, eval_bound))) { + LOG_WARN("Failed to evaluate vector", K(ret)); + } else { + ObIVector *res = assist_expr->get_vector(eval_ctx); + ret_val = res->is_true(batch_idx); + } + } + clear_evaluated_flags(); + if (op_.is_vectorized() && assist_expr->is_batch_result()) { + assist_expr->get_evaluated_flags(eval_ctx).unset(eval_ctx.get_batch_idx()); + } else { + assist_expr->get_eval_info(eval_ctx).clear_evaluated_flag(); + } + LOG_DEBUG("check judge greater or less status", K(expr_datum), K(datum), KPC(column_expr), KPC(assist_expr), K(is_greater)); } return ret; } diff --git a/src/sql/engine/basic/ob_pushdown_filter.h b/src/sql/engine/basic/ob_pushdown_filter.h index 2a098beeb..cf4d9df5f 100644 --- a/src/sql/engine/basic/ob_pushdown_filter.h +++ b/src/sql/engine/basic/ob_pushdown_filter.h @@ -287,6 +287,17 @@ public: {} }; +// Suppose f(x) is a black filter, the monotonicity of f(x) can be utilized to filter rows and accelerate query. +// The rules for different monotonicities are as follows: +enum PushdownFilterMonotonicity +{ + MON_NON = 0, // no monotonicity + MON_ASC = 1, // If f(min) is true, all f(x) is true if x >= min. If f(max) is false, all f(x) is false if x <= max. + MON_DESC = 2, // If f(max) is true, all f(x) if true if x <= max. If f(min) is false, all f(x) is false if x >= min. + MON_EQ_ASC = 3, // f(x) = const and f(x) is monotonicity asc. If f(min) > const || f(max) < const, f(x) is false if x in [min, max]. + MON_EQ_DESC = 4 // f(x) = const and f(x) is monotonicity desc. If f(min) < const || f(max) > const, f(x) is false if x in [min, max]. +}; + class ObPushdownBlackFilterNode : public ObPushdownFilterNode { OB_UNIS_VERSION_V(1); @@ -295,14 +306,17 @@ public: : ObPushdownFilterNode(alloc, PushdownFilterType::BLACK_FILTER), column_exprs_(alloc), filter_exprs_(alloc), - tmp_expr_(nullptr) + tmp_expr_(nullptr), + assist_exprs_(alloc), + mono_(MON_NON) {} ~ObPushdownBlackFilterNode() {} int merge(common::ObIArray &merged_node) override; virtual int postprocess() override; + OB_INLINE bool is_monotonic() const { return MON_NON != mono_; } INHERIT_TO_STRING_KV("ObPushdownBlackFilterNode", ObPushdownFilterNode, - K_(column_exprs), K_(filter_exprs)); + K_(column_exprs), K_(filter_exprs), K_(assist_exprs), K_(mono)); int64_t get_filter_expr_count() { return filter_exprs_.empty() ? 1 : filter_exprs_.count(); } @@ -312,6 +326,10 @@ public: // 下压临时保存的filter,如果发生merge,则所有的filter放入filter_exprs_ // 如果没有发生merge,则将自己的tmp_expr_放入filter_exprs_中 ObExpr *tmp_expr_; + // The exprs to judge greater or less when mono_ is MON_EQ_ASC/MON_EQ_DESC. + // assist_exprs_[0] is greater expr, assist_exprs_[1] is less expr. + ExprFixedArray assist_exprs_; + PushdownFilterMonotonicity mono_; }; enum ObWhiteFilterOperatorType @@ -458,15 +476,20 @@ private: class ObPushdownFilterConstructor { public: - ObPushdownFilterConstructor(common::ObIAllocator *alloc, ObStaticEngineCG &static_cg, + ObPushdownFilterConstructor(common::ObIAllocator *alloc, + ObStaticEngineCG &static_cg, + const ObLogTableScan *op, bool use_column_store) - : alloc_(alloc), factory_(alloc), static_cg_(static_cg), use_column_store_(use_column_store) + : alloc_(alloc), factory_(alloc), static_cg_(static_cg), op_(op), use_column_store_(use_column_store) {} int apply(common::ObIArray &exprs, ObPushdownFilterNode *&filter_tree); private: int is_white_mode(const ObRawExpr* raw_expr, bool &is_white); int create_black_filter_node(ObRawExpr *raw_expr, ObPushdownFilterNode *&filter_tree); + int get_black_filter_monotonicity(const ObRawExpr *raw_expr, + common::ObIArray &column_exprs, + ObPushdownBlackFilterNode *black_filter_node); template int create_white_or_dynamic_filter_node(ObRawExpr *raw_expr, ObPushdownFilterNode *&filter_tree, int64_t col_idx = 0); @@ -489,6 +512,7 @@ private: common::ObIAllocator *alloc_; ObPushdownFilterFactory factory_; ObStaticEngineCG &static_cg_; + const ObLogTableScan *op_; bool use_column_store_; }; @@ -729,7 +753,7 @@ public: K_(n_eval_infos), KP_(eval_infos)); protected: int init_eval_param(const int32_t cur_eval_info_cnt, const int64_t eval_expr_cnt); - void clear_evaluated_datums(); + void clear_evaluated_flags(); void clear_evaluated_infos(); protected: int32_t n_eval_infos_; @@ -762,6 +786,13 @@ public: INHERIT_TO_STRING_KV("ObPushdownBlackFilterExecutor", ObPhysicalFilterExecutor, K_(filter), KP_(skip_bit)); virtual int filter(ObEvalCtx &eval_ctx, const sql::ObBitVector &skip_bit, bool &filtered) override; + virtual int filter(blocksstable::ObStorageDatum &datum, const sql::ObBitVector &skip_bit, bool &ret_val); + virtual int judge_greater_or_less(blocksstable::ObStorageDatum &datum, + const sql::ObBitVector &skip_bit, + const bool is_greater, + bool &ret_val); + OB_INLINE bool is_monotonic() const { return filter_.is_monotonic(); } + OB_INLINE PushdownFilterMonotonicity get_monotonicity() const { return filter_.mono_; } private: int eval_exprs_batch(ObBitVector &skip, const int64_t bsize); diff --git a/src/sql/engine/expr/ob_expr_lrpad.cpp b/src/sql/engine/expr/ob_expr_lrpad.cpp index 008b2d066..0c0fc2785 100644 --- a/src/sql/engine/expr/ob_expr_lrpad.cpp +++ b/src/sql/engine/expr/ob_expr_lrpad.cpp @@ -478,7 +478,7 @@ int ObExprBaseLRpad::get_padding_info_mysql(const ObCollationType &cs, || OB_UNLIKELY(pad_size <= 0)) { // this should been resolve outside ret = OB_ERR_UNEXPECTED; - LOG_WARN("wrong len", K(ret), K(len), K(text_len)); + LOG_WARN("wrong len", K(ret), K(len), K(text_len), K(pad_len), K(pad_size)); } else { repeat_count = std::min((len - text_len) / pad_len, (max_result_size - text_size) / pad_size); int64_t remain_len = len - (text_len + pad_len * repeat_count); diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp index e43d27985..ba63a0fbd 100644 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -186,6 +186,10 @@ int ObLogTableScan::get_op_exprs(ObIArray &all_exprs) LOG_WARN("failed to append exprs", K(ret)); } else if (OB_FAIL(append(all_exprs, pushdown_aggr_exprs_))) { LOG_WARN("failed to append exprs", K(ret)); + } else if (OB_FAIL(generate_filter_monotonicity())) { + LOG_WARN("failed to analyze filter monotonicity", K(ret)); + } else if (OB_FAIL(get_filter_assist_exprs(all_exprs))) { + LOG_WARN("failed to get filter assist expr", K(ret)); } else if (OB_FAIL(ObLogicalOperator::get_op_exprs(all_exprs))) { LOG_WARN("failed to get exprs", K(ret)); } else { /*do nothing*/ } @@ -1081,7 +1085,7 @@ int ObLogTableScan::set_table_scan_filters(const common::ObIArray & LOG_WARN("failed to pick out query range exprs", K(ret)); } else if (OB_FAIL(pick_out_startup_filters())) { LOG_WARN("failed to pick out startup filters", K(ret)); - } else { /*do nothing*/ } + } return ret; } @@ -2539,3 +2543,196 @@ int ObLogTableScan::get_card_without_filter(double &card) card = NULL != est_cost_info_ ? est_cost_info_->phy_query_range_row_count_ : 1.0; return ret; } + +int ObLogTableScan::generate_filter_monotonicity() +{ + int ret = OB_SUCCESS; + ObExecContext *exec_ctx = NULL; + const ParamStore *param_store = NULL; + ObRawExpr * filter_expr = NULL; + ObSEArray col_exprs; + if (OB_ISNULL(get_plan()) || OB_ISNULL(get_stmt()) || OB_ISNULL(get_stmt()->get_query_ctx()) || + OB_ISNULL(exec_ctx = get_plan()->get_optimizer_context().get_exec_ctx()) || + OB_ISNULL(param_store = get_plan()->get_optimizer_context().get_params())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("got unexpected NULL ptr", K(ret)); + } else if (get_stmt()->get_query_ctx()->optimizer_features_enable_version_ < COMPAT_VERSION_4_3_2) { + filter_monotonicity_.reset(); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < get_filter_exprs().count(); ++i) { + col_exprs.reuse(); + if (OB_ISNULL(filter_expr = get_filter_exprs().at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("got unexpected NULL ptr", K(ret)); + } else if (T_OP_GT != filter_expr->get_expr_type() && + T_OP_GE != filter_expr->get_expr_type() && + T_OP_LT != filter_expr->get_expr_type() && + T_OP_LE != filter_expr->get_expr_type() && + T_OP_EQ != filter_expr->get_expr_type()) { + /* do nothing */ + } else if (OB_UNLIKELY(2 != filter_expr->get_param_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("got unexpected param", K(ret), K(*filter_expr)); + } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(filter_expr, col_exprs))) { + LOG_WARN("failed to extract column exprs", K(ret)); + } else if (1 == col_exprs.count()) { + Monotonicity mono = Monotonicity::NONE_MONO; + Monotonicity left_mono = Monotonicity::NONE_MONO; + Monotonicity right_mono = Monotonicity::NONE_MONO; + bool left_dummy_bool = true; + bool right_dummy_bool = true; + ObPCConstParamInfo left_const_param_info; + ObPCConstParamInfo right_const_param_info; + ObRawFilterMonotonicity *filter_mono = NULL; + ObOpRawExpr *assist_expr = NULL; + ObRawExpr *func_expr = NULL; + ObRawExpr *const_expr = NULL; + if (OB_FAIL(ObOptimizerUtil::get_expr_monotonicity(filter_expr->get_param_expr(0), col_exprs.at(0), + *exec_ctx, left_mono, left_dummy_bool, + *param_store, left_const_param_info))) { + LOG_WARN("failed to get expr monotonicity", K(ret)); + } else if (OB_FAIL(ObOptimizerUtil::get_expr_monotonicity(filter_expr->get_param_expr(1), + col_exprs.at(0), *exec_ctx, right_mono, + right_dummy_bool, *param_store, + right_const_param_info))) { + LOG_WARN("failed to get expr monotonicity", K(ret)); + } else { + if (Monotonicity::NONE_MONO == left_mono) { + /* do nothing */ + } else if (Monotonicity::CONST == left_mono) { + const_expr = filter_expr->get_param_expr(0); + } else if (Monotonicity::ASC == left_mono || Monotonicity::DESC == left_mono) { + func_expr = filter_expr->get_param_expr(0); + mono = left_mono; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("got unknow monotonicity type", K(ret), K(left_mono)); + } + if (OB_FAIL(ret)) { + } else if (Monotonicity::NONE_MONO == right_mono) { + /* do nothing */ + } else if (Monotonicity::CONST == right_mono) { + const_expr = filter_expr->get_param_expr(1); + } else if (Monotonicity::ASC == right_mono || Monotonicity::DESC == right_mono) { + func_expr = filter_expr->get_param_expr(1); + mono = right_mono; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("got unknow monotonicity type", K(ret), K(right_mono)); + } + } + if (OB_SUCC(ret)) { + if (NULL == func_expr || NULL == const_expr || + !(Monotonicity::ASC == mono || Monotonicity::DESC == mono)) { + /* do nothing */ + } else if (OB_ISNULL(filter_mono = filter_monotonicity_.alloc_place_holder())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc failed", K(ret)); + } else if (!left_const_param_info.const_idx_.empty() && + OB_FAIL(const_param_constraints_.push_back(left_const_param_info))) { + LOG_WARN("failed to push back", K(ret)); + } else if (!right_const_param_info.const_idx_.empty() && + OB_FAIL(const_param_constraints_.push_back(right_const_param_info))) { + LOG_WARN("failed to push back", K(ret)); + } else { + filter_mono->filter_expr_ = filter_expr; + filter_mono->col_expr_ = static_cast(col_exprs.at(0)); + if (T_OP_EQ != filter_expr->get_expr_type()) { + /* asc && f(x) > const --> mon_asc + * asc && f(x) < const --> mon_desc + * desc && f(x) > const --> mon_desc + * desc && f(x) < const --> mon_asc + */ + if (Monotonicity::ASC == mono) { + if (T_OP_GT == filter_expr->get_expr_type() || + T_OP_GE == filter_expr->get_expr_type()) { + filter_mono->mono_ = PushdownFilterMonotonicity::MON_ASC; + } else { + filter_mono->mono_ = PushdownFilterMonotonicity::MON_DESC; + } + } else { + if (T_OP_GT == filter_expr->get_expr_type() || + T_OP_GE == filter_expr->get_expr_type()) { + filter_mono->mono_ = PushdownFilterMonotonicity::MON_DESC; + } else { + filter_mono->mono_ = PushdownFilterMonotonicity::MON_ASC; + } + } + } else { + /* asc && f(x) = const --> mon_eq_asc + f(x) > const + f(x) < const + * desc && f(x) = const --> mon_eq_desc + f(x) > const + f(x) < const + */ + ObRawExprFactory &expr_factory = get_plan()->get_optimizer_context().get_expr_factory(); + ObIAllocator &allocator = get_plan()->get_allocator(); + filter_mono->mono_ = Monotonicity::ASC == mono ? PushdownFilterMonotonicity::MON_EQ_ASC : + PushdownFilterMonotonicity::MON_EQ_DESC; + filter_mono->assist_exprs_.set_allocator(&allocator); + filter_mono->assist_exprs_.set_capacity(2); + if (OB_FAIL(expr_factory.create_raw_expr(T_OP_GT, assist_expr))) { + LOG_WARN("failed to create gt raw expr", K(ret)); + } else if (OB_ISNULL(assist_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc failed", K(ret)); + } else if (OB_FAIL(assist_expr->set_param_exprs(func_expr, const_expr))) { + LOG_WARN("failed to set param exprs", K(ret)); + } else if (OB_FAIL(assist_expr->formalize(get_plan()->get_optimizer_context().get_session_info()))) { + LOG_WARN("failed to get formalize expr", K(ret)); + } else if (OB_FAIL(filter_mono->assist_exprs_.push_back(assist_expr))) { + LOG_WARN("failed to push back", K(ret)); + } else if (OB_FAIL(expr_factory.create_raw_expr(T_OP_LT, assist_expr))) { + LOG_WARN("failed to create gt raw expr", K(ret)); + } else if (OB_ISNULL(assist_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc failed", K(ret)); + } else if (OB_FAIL(assist_expr->set_param_exprs(func_expr, const_expr))) { + LOG_WARN("failed to set param exprs", K(ret)); + } else if (OB_FAIL(assist_expr->formalize(get_plan()->get_optimizer_context().get_session_info()))) { + LOG_WARN("failed to get formalize expr", K(ret)); + } else if (OB_FAIL(filter_mono->assist_exprs_.push_back(assist_expr))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } + } + } + } // end for + } + return ret; +} + +int ObLogTableScan::get_filter_monotonicity(const ObRawExpr *filter, + const ObColumnRefRawExpr *col_expr, + PushdownFilterMonotonicity &mono, + ObIArray &assist_exprs) const +{ + int ret = OB_SUCCESS; + mono = PushdownFilterMonotonicity::MON_NON; + assist_exprs.reuse(); + if (OB_ISNULL(filter) || OB_ISNULL(col_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("got unexpected NULL ptr", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < filter_monotonicity_.count(); ++i) { + if (filter == filter_monotonicity_.at(i).filter_expr_ && + col_expr == filter_monotonicity_.at(i).col_expr_ && + PushdownFilterMonotonicity::MON_NON != filter_monotonicity_.at(i).mono_) { + mono = filter_monotonicity_.at(i).mono_; + if (OB_FAIL(append(assist_exprs, filter_monotonicity_.at(i).assist_exprs_))) { + LOG_WARN("failed to append"); + } + break; + } + } + return ret; +} + +int ObLogTableScan::get_filter_assist_exprs(ObIArray &assist_exprs) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < filter_monotonicity_.count(); ++i) { + if (OB_FAIL(append(assist_exprs, filter_monotonicity_.at(i).assist_exprs_))) { + LOG_WARN("failed to append"); + } + } + return ret; +} \ No newline at end of file diff --git a/src/sql/optimizer/ob_log_table_scan.h b/src/sql/optimizer/ob_log_table_scan.h index 22137ebc2..b714e04c4 100644 --- a/src/sql/optimizer/ob_log_table_scan.h +++ b/src/sql/optimizer/ob_log_table_scan.h @@ -77,6 +77,25 @@ struct ObTextRetrievalInfo ObRawExpr *relevance_expr_; // BM25 }; +struct ObRawFilterMonotonicity +{ + ObRawFilterMonotonicity() : filter_expr_(NULL), + col_expr_(NULL), + mono_(PushdownFilterMonotonicity::MON_NON), + assist_exprs_() {} + + + + ObRawExpr *filter_expr_; + ObColumnRefRawExpr *col_expr_; + PushdownFilterMonotonicity mono_; + common::ObFixedArray assist_exprs_; + TO_STRING_KV(K_(filter_expr), + K_(col_expr), + K_(mono), + K_(assist_exprs)); +}; + class ObLogTableScan : public ObLogicalOperator { public: @@ -135,7 +154,8 @@ public: table_type_(share::schema::MAX_TABLE_TYPE), use_column_store_(false), doc_id_table_id_(common::OB_INVALID_ID), - text_retrieval_info_() + text_retrieval_info_(), + filter_monotonicity_() { } @@ -558,6 +578,12 @@ public: inline ObRawExpr *get_identify_seq_expr() { return identify_seq_expr_; } void set_identify_seq_expr(ObRawExpr *expr) { identify_seq_expr_ = expr; } + const ObIArray& get_filter_monotonicity() const + { return filter_monotonicity_; } + int get_filter_monotonicity(const ObRawExpr *filter, + const ObColumnRefRawExpr *col_expr, + PushdownFilterMonotonicity &mono, + ObIArray &assist_exprs) const; private: // member functions //called when index_back_ set int pick_out_query_range_exprs(); @@ -578,6 +604,8 @@ private: // member functions int get_text_retrieval_calc_exprs(ObIArray &all_exprs); int print_text_retrieval_annotation(char *buf, int64_t buf_len, int64_t &pos, ExplainType type); int find_nearest_rcte_op(ObLogSet *&rcte_op); + int generate_filter_monotonicity(); + int get_filter_assist_exprs(ObIArray &assist_exprs); protected: // memeber variables // basic info uint64_t table_id_; //table id or alias table id @@ -693,6 +721,8 @@ protected: // memeber variables ObTextRetrievalInfo text_retrieval_info_; ObPxRFStaticInfo px_rf_info_; + typedef common::ObSEArray FilterMonotonicity; + FilterMonotonicity filter_monotonicity_; // disallow copy and assign DISALLOW_COPY_AND_ASSIGN(ObLogTableScan); }; diff --git a/src/sql/optimizer/ob_optimizer_util.cpp b/src/sql/optimizer/ob_optimizer_util.cpp index a228932cc..b2dd7f863 100644 --- a/src/sql/optimizer/ob_optimizer_util.cpp +++ b/src/sql/optimizer/ob_optimizer_util.cpp @@ -953,6 +953,254 @@ int ObOptimizerUtil::compute_const_exprs(ObRawExpr *cur_expr, return ret; } +static inline Monotonicity get_opposite_of(Monotonicity mono) { + Monotonicity ret = Monotonicity::CONST; + if (mono == Monotonicity::ASC) { + ret = Monotonicity::DESC; + } else if (mono == Monotonicity::DESC) { + ret = Monotonicity::ASC; + } else if (mono == Monotonicity::CONST) { + ret = Monotonicity::CONST; + } else if (mono == Monotonicity::NONE_MONO) { + ret = Monotonicity::NONE_MONO; + } + return ret; +} + +int ObOptimizerUtil::get_expr_monotonicity(const ObRawExpr *expr, + const ObRawExpr *var, + ObExecContext &ctx, + Monotonicity &monotonicity, + bool &is_strict, + const ParamStore ¶m_store, + ObPCConstParamInfo& const_param_info) +{ + int ret = OB_SUCCESS; + bool is_not_null = false; + const ObColumnRefRawExpr *col = NULL; + if (OB_ISNULL(expr) || OB_ISNULL(var)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expression input is null error", K(ret)); + } else if (!var->is_column_ref_expr()) { + monotonicity = Monotonicity::NONE_MONO; + } else if (OB_FALSE_IT(col = static_cast(var))) { + // never in + } else if (OB_FAIL(get_expr_monotonicity_recursively(expr, col, ctx, + monotonicity, is_strict, + param_store, const_param_info))) { + LOG_WARN("Failed to get expr monotonicity recursiviely ", K(ret)); + } + return ret; +} + +// require funtion type is null propagate expr +int ObOptimizerUtil::get_expr_monotonicity_recursively(const ObRawExpr* expr, + const ObColumnRefRawExpr *var, + ObExecContext& ctx, + Monotonicity &monotonicity, + bool &is_strict, + const ParamStore ¶m_store, + ObPCConstParamInfo& const_param_info) +{ + int ret = OB_SUCCESS; + monotonicity = Monotonicity::NONE_MONO; + is_strict = false; + if (OB_ISNULL(expr) || OB_ISNULL(var)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expression input is null error", K(ret)); + } else if (expr->is_const_raw_expr()) { + // here if cannot merge into upperline + // select x + 10(:?) 通过返回结果看 + int 或者+ null都会被解析为const raw expr 只有返回类型能够区分 + if (!expr->get_result_type().get_param().is_null_oracle()) { + monotonicity = Monotonicity::CONST; + } + } else if (expr->is_column_ref_expr()) { + // there is an another col in expr but is not var + if (expr == var) { + monotonicity = Monotonicity::ASC; + is_strict = true; + } + } else { + // The following is a classification discussion for composite cases. + // Only one branch will be chosen for entry. Before entering, the monotonicity is none. + Monotonicity mono = Monotonicity::NONE_MONO; + bool is_strict_inner = false; + if (expr->get_expr_type() == T_FUN_SYS_UPPER || expr->get_expr_type() == T_FUN_SYS_LOWER) { + const ObRawExpr *param_expr = expr->get_param_expr(0); + if (OB_ISNULL(param_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expression input is null error", K(ret)); + } else if (ObCharset::is_ci_collate(param_expr->get_collation_type())) { + if (OB_FAIL(SMART_CALL(get_expr_monotonicity_recursively(param_expr, var, ctx, + mono, is_strict_inner, + param_store, const_param_info)))) { + LOG_WARN("get string param monotonicity failed", K(ret)); + } else { + monotonicity = mono; + is_strict = is_strict_inner; + } + } + } else if (expr->get_expr_type() == T_FUN_SYS_CAST) { + const ObRawExpr *param_expr = expr->get_param_expr(0); + bool is_consistent = false; + if (OB_ISNULL(param_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected exprssion input is null error", K(ret)); + } else if (OB_FAIL(ObObjCaster::is_order_consistent(param_expr->get_result_type(), + expr->get_result_type(), + is_consistent))) { + if (OB_ERR_UNEXPECTED == ret) { + LOG_WARN("failed to check is order consistent", K(ret)); + } else { + ret = OB_SUCCESS; + is_consistent = false; + } + } else if (is_consistent) { + if (OB_FAIL(SMART_CALL(get_expr_monotonicity_recursively(param_expr, var, ctx, + mono, is_strict_inner, + param_store, const_param_info)))) { + LOG_WARN("get data time param", K(ret)); + } else { + monotonicity = mono; + is_strict = false; + } + } else if (!is_oracle_mode() + && param_expr->get_result_type().is_datetime() + && expr->get_result_type().is_string_type()) { + if (OB_FAIL(SMART_CALL(get_expr_monotonicity_recursively(param_expr, var, ctx, + mono, is_strict_inner, + param_store, const_param_info)))) { + LOG_WARN("get data time param", K(ret)); + } else { + monotonicity = mono; + is_strict = is_strict_inner; + } + } + } else if (expr->get_expr_type() == T_FUN_SYS_LEFT) { + const ObRawExpr *param_expr_str = expr->get_param_expr(0); + const ObRawExpr *param_expr_num = expr->get_param_expr(1); + if (OB_ISNULL(param_expr_str) || OB_ISNULL(param_expr_num)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected exprssion input is null error", K(ret)); + } else if ((ObCharset::is_bin_sort(param_expr_str->get_result_type().get_collation_type()) || + CS_TYPE_UTF8MB4_GENERAL_CI == param_expr_str->get_result_type().get_collation_type()) && + param_expr_num->is_const_raw_expr() && + !param_expr_num->get_result_type().is_null()) { + if (OB_FAIL(SMART_CALL(get_expr_monotonicity_recursively(param_expr_str, var, ctx, + mono, is_strict_inner, + param_store, const_param_info)))) { + LOG_WARN("get string param monotonicity failed", K(ret)); + } else { + monotonicity = mono; + is_strict = false; + } + } + } else if (expr->get_expr_type() == T_FUN_SYS_FLOOR || expr->get_expr_type() == T_FUN_SYS_CEIL) { + const ObRawExpr *param_expr = expr->get_param_expr(0); + if (OB_ISNULL(param_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected exprssion input is null error", K(ret)); + } else if (OB_FAIL(SMART_CALL(get_expr_monotonicity_recursively(param_expr, var, ctx, + mono, is_strict_inner, + param_store, const_param_info)))) { + LOG_WARN("get string param monotonicity failed", K(ret)); + } else { + monotonicity = mono; + is_strict = false; + } + } else if (expr->get_expr_type() == T_FUN_SYS_SUBSTR) { + const ObRawExpr *param_expr_str = expr->get_param_expr(0); + const ObRawExpr *param_expr_pos = expr->get_param_expr(1); + const ObRawExpr *param_expr_len = expr->get_param_count() == 3 ? expr->get_param_expr(2) : NULL; + ObSEArray params; + ObArenaAllocator local_allocator; + int64_t value = 0; + bool is_null_value = true; + bool is_one = false; + ObRawExpr* param_expr = NULL; + const ObConstRawExpr *const_expr = NULL; + ObObj target_value; + target_value.set_int(ObIntType, 1); + if (OB_ISNULL(param_expr_str) || OB_ISNULL(param_expr_pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected exprssion input is null error", K(ret)); + } else if ((ObCharset::is_bin_sort(param_expr_str->get_result_type().get_collation_type()) || + CS_TYPE_UTF8MB4_GENERAL_CI == param_expr_str->get_result_type().get_collation_type()) && + (param_expr_pos->is_const_raw_expr() && !param_expr_pos->get_result_type().is_null()) && + (param_expr_len == NULL || (param_expr_len != NULL && param_expr_len->is_const_raw_expr() && !param_expr_len->get_result_type().is_null()))) { + if (OB_FAIL(ObTransformUtils::get_expr_int_value(const_cast(param_expr_pos), ¶m_store, + &ctx, &local_allocator, value, is_null_value))) { + LOG_WARN("failed to check limit value", K(ret)); + } else if (is_null_value || value != 1) { + /* monotonicity = Monotonicity::NONE_MONO; is_strict = false; */ + } else if (OB_FAIL(SMART_CALL(get_expr_monotonicity_recursively(param_expr_str, var, ctx, + mono,is_strict_inner, + param_store, const_param_info)))) { + LOG_WARN("get string param monotonicity failed", K(ret)); + } else if (OB_FALSE_IT(const_expr = static_cast(param_expr_pos))) { + } else if (OB_FAIL(const_param_info.const_idx_.push_back(const_expr->get_value().get_unknown()))) { + LOG_WARN("failed to push back param idx", K(ret)); + } else if (OB_FAIL(const_param_info.const_params_.push_back(target_value))) { + LOG_WARN("failed to push back value", K(ret)); + } else { + monotonicity = mono; + is_strict = false; + } + } + } else if (expr->get_expr_type() == T_OP_MINUS || expr->get_expr_type() == T_OP_ADD) { + if (expr->get_param_count() != 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get param count", K(ret)); + } else { + Monotonicity left_mono = Monotonicity::NONE_MONO; + Monotonicity right_mono = Monotonicity::NONE_MONO; + const ObRawExpr *l_expr = expr->get_param_expr(0); + const ObRawExpr *r_expr = expr->get_param_expr(1); + bool is_strict_l = false; + bool is_strict_r = false; + if (OB_ISNULL(l_expr) || OB_ISNULL(r_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("op expr one side is null", K(l_expr), K(r_expr)); + } else if ((!l_expr->get_result_type().is_numeric_type() || !r_expr->get_result_type().is_numeric_type()) + || (l_expr->get_result_type().is_float() || r_expr->get_result_type().is_float()) + || (l_expr->get_result_type().is_double()|| r_expr->get_result_type().is_double())) { + // 字符串类型是不正确的 abc < abcd 但是abcz > abcdz, 而且字符串没有减法 + // explain select distinct(t0.c2) from t0 where upper(t0.c2) + "ZHU" = "QINGZHU"; + // 日期类型 日期加减有时候会是正确的。 + monotonicity = Monotonicity::NONE_MONO; + } else if (OB_FAIL(SMART_CALL(get_expr_monotonicity_recursively(l_expr, var, ctx, + left_mono, is_strict_l, + param_store, const_param_info))) + || OB_FAIL(SMART_CALL(get_expr_monotonicity_recursively(r_expr, var, ctx, + right_mono, is_strict_r, + param_store, const_param_info)))) { + LOG_WARN("failed to get expr monotonicity for expr", K(ret)); + } else { + if (expr->get_expr_type() == T_OP_MINUS) { + right_mono = get_opposite_of(right_mono); + } + if (left_mono == Monotonicity::CONST) { + monotonicity = right_mono; + is_strict = is_strict_r; + } else if (right_mono == Monotonicity::CONST) { + monotonicity = left_mono; + is_strict = is_strict_l; + } else if (left_mono == right_mono && (left_mono == Monotonicity::ASC || left_mono == Monotonicity::DESC)) { + monotonicity = left_mono; + is_strict = is_strict_l || is_strict_r; + } else { + monotonicity = Monotonicity::NONE_MONO; + } + } + } + } else { + monotonicity = Monotonicity::NONE_MONO; + } + } + + return ret; +} + bool ObOptimizerUtil::overlap_exprs(const ObIArray &exprs1, const ObIArray &exprs2) { @@ -1578,7 +1826,7 @@ int ObOptimizerUtil::extract_equal_exec_params(const ObIArray &expr */ } else if (OB_FAIL(left_key.push_back(exec_param->get_ref_expr()))) { LOG_WARN("push back error", K(ret)); - } else if (OB_FAIL(right_key.push_back(cur_expr->get_param_expr(1)))){ + } else if (OB_FAIL(right_key.push_back(cur_expr->get_param_expr(1)))) { LOG_WARN("push back error", K(ret)); } else if (OB_FAIL(null_safe_info.push_back(is_null_safe))) { LOG_WARN("push back error", K(ret)); @@ -1589,7 +1837,7 @@ int ObOptimizerUtil::extract_equal_exec_params(const ObIArray &expr // not my exec param } else if (OB_FAIL(left_key.push_back(exec_param->get_ref_expr()))) { LOG_WARN("push back error", K(ret)); - } else if (OB_FAIL(right_key.push_back(cur_expr->get_param_expr(0)))){ + } else if (OB_FAIL(right_key.push_back(cur_expr->get_param_expr(0)))) { LOG_WARN("push back error", K(ret)); } else if (OB_FAIL(null_safe_info.push_back(is_null_safe))) { LOG_WARN("push back error", K(ret)); @@ -3114,7 +3362,7 @@ int ObOptimizerUtil::is_exprs_unique(const ObIArray &exprs, LOG_WARN("failed to get fd set parent exprs ", K(ret)); } else if (OB_FAIL(remove_item(fd_set_parent_exprs, extend_exprs))) { LOG_WARN("failed to get fd set parent exprs ", K(ret)); - }else { + } else { ObRelIds remain_tables = all_tables; ObSqlBitSet<> skip_fd; int64_t exprs_count = -1; @@ -3204,7 +3452,7 @@ int ObOptimizerUtil::is_exprs_unique(const ObIArray &exprs, if (OB_ISNULL(fd_item_set.at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); - } else if(!fd_item_set.at(i)->is_unique()) { + } else if (!fd_item_set.at(i)->is_unique()) { // do nothing } else if (OB_FAIL(is_exprs_contain_fd_parent(exprs, *fd_item_set.at(i), equal_sets, const_exprs, is_unique))) { @@ -3799,9 +4047,9 @@ int ObOptimizerUtil::check_need_sort(const ObIArray &expected_order_i int ret = OB_SUCCESS; ObSEArray expected_order_exprs; ObSEArray expected_order_directions; - if(OB_FAIL(split_expr_direction(expected_order_items, - expected_order_exprs, - expected_order_directions))) { + if (OB_FAIL(split_expr_direction(expected_order_items, + expected_order_exprs, + expected_order_directions))) { LOG_WARN("failed to split expr and expected_order_directions", K(ret)); } else if (OB_FAIL(check_need_sort(expected_order_exprs, &expected_order_directions, @@ -4026,7 +4274,7 @@ int ObOptimizerUtil::check_need_sort(const ObIArray &expected_order_ while(left_set.has_member(l_idx)) { ++l_idx; }; - while(right_set.has_member(r_idx)){ + while(right_set.has_member(r_idx)) { ++r_idx; }; } @@ -4524,7 +4772,7 @@ int ObOptimizerUtil::get_type_safe_join_exprs(const ObIArray &join_ ObRawExpr *second_expr = NULL; ObRawExpr *left_expr = NULL; ObRawExpr *right_expr = NULL; - for (int64_t i = 0 ; OB_SUCC(ret) && i < join_quals.count(); ++i){ + for (int64_t i = 0 ; OB_SUCC(ret) && i < join_quals.count(); ++i) { cur_expr = join_quals.at(i); if (OB_ISNULL(cur_expr)) { ret = OB_ERR_UNEXPECTED; @@ -4640,7 +4888,7 @@ int ObOptimizerUtil::check_push_down_expr(const ObRelIds &table_ids, } else { /* do nothing */ } } if (OB_FAIL(ret)) { - } else if (sub_exprs.at(i).empty()){ + } else if (sub_exprs.at(i).empty()) { all_contain = false; } } else if (cur_expr->has_flag(CNT_SUB_QUERY)) { @@ -4849,7 +5097,7 @@ int ObOptimizerUtil::simplify_exprs(const ObFdItemSet &fd_item_set, } } for (int64_t i = 0; OB_SUCC(ret) && i < root_exprs.count(); ++i) { - if(OB_UNLIKELY(!find_item(candi_exprs, root_exprs.at(i), &expr_idx))) { + if (OB_UNLIKELY(!find_item(candi_exprs, root_exprs.at(i), &expr_idx))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to find item", K(ret)); } else if (OB_FAIL(root_exprs_set.add_member(expr_idx))) { @@ -9258,7 +9506,7 @@ int ObOptimizerUtil::pushdown_and_rename_filter_into_subquery(const ObDMLStmt &p push_filters))) { LOG_WARN("failed to rename pushdown filter", K(ret)); } - for (int64_t i = 0 ; OB_SUCC(ret) && i < remain_filters.count(); i ++){ + for (int64_t i = 0 ; OB_SUCC(ret) && i < remain_filters.count(); i ++) { ObRawExpr *part_push_filter = NULL; bool can_pushdown_all = false; if (OB_FAIL(split_or_filter_into_subquery(parent_stmt, @@ -9407,7 +9655,7 @@ int ObOptimizerUtil::split_or_filter_into_subquery(ObIArray & can_push_to_where, check_match_index))) { LOG_WARN("failed to pushdown filter", K(ret)); - } else if (push_filters.empty()){ + } else if (push_filters.empty()) { // AND pred can not be pushed have_push_filter = false; can_pushdown_all = false; diff --git a/src/sql/optimizer/ob_optimizer_util.h b/src/sql/optimizer/ob_optimizer_util.h index e4bb0526a..10c14809f 100644 --- a/src/sql/optimizer/ob_optimizer_util.h +++ b/src/sql/optimizer/ob_optimizer_util.h @@ -32,6 +32,14 @@ enum PartitionRelation COMPATIBLE_COMMON }; +enum Monotonicity +{ + NONE_MONO, + ASC, + DESC, + CONST +}; + struct MergeKeyInfo { MergeKeyInfo(common::ObIAllocator &allocator, int64_t size) @@ -187,6 +195,23 @@ public: const ObIArray &exec_ref_exprs, int64_t &number); + + static int get_expr_monotonicity(const ObRawExpr *expr, + const ObRawExpr *var, + ObExecContext &ctx, + Monotonicity &monotonicity, + bool &is_strict, + const ParamStore ¶m_store, + ObPCConstParamInfo& const_param_info); + + static int get_expr_monotonicity_recursively(const ObRawExpr *expr, + const ObColumnRefRawExpr *var, + ObExecContext &ctx, + Monotonicity &monotonicity, + bool &is_strict, + const ParamStore ¶m_store, + ObPCConstParamInfo &const_param_info); + static bool is_sub_expr(const ObRawExpr *sub_expr, const ObRawExpr *expr); static bool is_sub_expr(const ObRawExpr *sub_expr, const ObIArray &exprs); static bool is_sub_expr(const ObRawExpr *sub_expr, ObRawExpr *&expr, ObRawExpr **&addr_matched_expr); diff --git a/src/sql/plan_cache/ob_plan_match_helper.cpp b/src/sql/plan_cache/ob_plan_match_helper.cpp index a91ed1578..4bab7d9f3 100644 --- a/src/sql/plan_cache/ob_plan_match_helper.cpp +++ b/src/sql/plan_cache/ob_plan_match_helper.cpp @@ -39,7 +39,6 @@ int ObPlanMatchHelper::match_plan(const ObPlanCacheCtx &pc_ctx, const ObIArray &plan_tbl_locs = plan->get_table_locations(); PWJTabletIdMap pwj_map; bool use_pwj_map = false; - if (0 == base_cons.count()) { // match all is_matched = true; diff --git a/src/sql/rewrite/ob_transform_where_subquery_pullup.cpp b/src/sql/rewrite/ob_transform_where_subquery_pullup.cpp index ce759cbf3..c1cb65cda 100644 --- a/src/sql/rewrite/ob_transform_where_subquery_pullup.cpp +++ b/src/sql/rewrite/ob_transform_where_subquery_pullup.cpp @@ -1249,6 +1249,7 @@ int ObWhereSubQueryPullup::check_subquery_validity(ObDMLStmt &stmt, LOG_WARN("failed to push back const column", K(ret)); } } + //1.检查是否是single set query if (OB_SUCC(ret) && is_valid) { if (OB_FAIL(ObTransformUtils::check_stmt_unique(subquery, ctx_->session_info_, diff --git a/src/storage/access/ob_index_tree_prefetcher.cpp b/src/storage/access/ob_index_tree_prefetcher.cpp index dd332d1e4..0ac718d71 100644 --- a/src/storage/access/ob_index_tree_prefetcher.cpp +++ b/src/storage/access/ob_index_tree_prefetcher.cpp @@ -1268,7 +1268,7 @@ int ObIndexTreeMultiPassPrefetcher::p } else if (nullptr != sstable_index_filter && can_index_filter_skip(block_info, sample_executor) && OB_FAIL(sstable_index_filter->check_range(iter_param_->read_info_, - block_info, *(access_ctx_->allocator_)))) { + block_info, *(access_ctx_->allocator_), iter_param_->vectorized_enabled_))) { LOG_WARN("Fail to check if can skip prefetch", K(ret), K(block_info)); } else if (block_info.is_filter_always_false()) { continue; @@ -1731,7 +1731,8 @@ int ObIndexTreeMultiPassPrefetcher::O } else if (nullptr != sstable_index_filter && prefetcher.can_index_filter_skip(index_info, sample_executor) && OB_FAIL(sstable_index_filter->check_range(prefetcher.iter_param_->read_info_, index_info, - *(prefetcher.access_ctx_->allocator_)))) { + *(prefetcher.access_ctx_->allocator_), + prefetcher.iter_param_->vectorized_enabled_))) { LOG_WARN("Fail to check if can skip prefetch", K(ret), K(index_info)); } else if (index_info.is_filter_always_false()) { } else if (nullptr != prefetcher.agg_row_store_ && prefetcher.agg_row_store_->can_agg_index_info(index_info)) { diff --git a/src/storage/access/ob_sstable_index_filter.cpp b/src/storage/access/ob_sstable_index_filter.cpp index 9fa46c459..911d62b8e 100644 --- a/src/storage/access/ob_sstable_index_filter.cpp +++ b/src/storage/access/ob_sstable_index_filter.cpp @@ -39,6 +39,8 @@ int ObSSTableIndexFilter::init( LOG_WARN("Unexpected nullptr read_info", K(ret), KP(read_info), K(is_cg)); } else if (OB_FAIL(build_skipping_filter_nodes(read_info, pushdown_filter))) { LOG_WARN("Fail to build skipping filter node", K(ret)); + } else if (OB_FAIL(skip_filter_executor_.init(MAX(1, pushdown_filter.get_op().get_batch_size()), allocator))) { + LOG_WARN("Failed to init skip filter executor", K(ret)); } else { pushdown_filter_ = &pushdown_filter; allocator_ = allocator; @@ -52,7 +54,8 @@ int ObSSTableIndexFilter::init( int ObSSTableIndexFilter::check_range( const ObITableReadInfo *read_info, blocksstable::ObMicroIndexInfo &index_info, - common::ObIAllocator &allocator) + common::ObIAllocator &allocator, + const bool use_vectorize) { int ret = OB_SUCCESS; if (IS_NOT_INIT) { @@ -69,7 +72,7 @@ int ObSSTableIndexFilter::check_range( bool can_use_skipping_index_filter = false; for (int64_t i = 0; OB_SUCC(ret) && i < skipping_filter_nodes_.count(); ++i) { ObSkippingFilterNode &node = skipping_filter_nodes_[i]; - if (OB_FAIL(is_filtered_by_skipping_index(read_info, index_info, node, allocator))) { + if (OB_FAIL(is_filtered_by_skipping_index(read_info, index_info, node, allocator, use_vectorize))) { LOG_WARN("Fail to do filter by skipping index", K(ret), K(index_info)); } else { can_use_skipping_index_filter = @@ -102,7 +105,8 @@ int ObSSTableIndexFilter::is_filtered_by_skipping_index( const ObITableReadInfo *read_info, blocksstable::ObMicroIndexInfo &index_info, ObSkippingFilterNode &node, - common::ObIAllocator &allocator) + common::ObIAllocator &allocator, + const bool use_vectorize) { int ret = OB_SUCCESS; node.is_already_determinate_ = false; @@ -113,17 +117,17 @@ int ObSSTableIndexFilter::is_filtered_by_skipping_index( // There is no need to check skipping index because filter result is contant already. node.is_already_determinate_ = true; } else { - auto *white_filter = static_cast(node.filter_); - const uint32_t col_offset = white_filter->get_col_offsets(is_cg_).at(0); + const uint32_t col_offset = node.filter_->get_col_offsets(is_cg_).at(0); const uint32_t col_idx = static_cast(read_info->get_columns_index().at(col_offset)); const ObObjMeta obj_meta = read_info->get_columns_desc().at(col_offset).col_type_; if (OB_FAIL(skip_filter_executor_.falsifiable_pushdown_filter(col_idx, obj_meta, node.skip_index_type_, index_info, - *white_filter, - allocator))) { - LOG_WARN("Fail to falsifiable pushdown filter", K(ret), K(white_filter)); + *node.filter_, + allocator, + use_vectorize))) { + LOG_WARN("Fail to falsifiable pushdown filter", K(ret), K(node.filter_)); } } return ret; @@ -155,12 +159,12 @@ int ObSSTableIndexFilter::extract_skipping_filter_from_tree( sql::ObPushdownFilterExecutor &filter) { int ret = OB_SUCCESS; - if (filter.is_filter_white_node()) { - auto &white_filter = static_cast(filter); + sql::ObPhysicalFilterExecutor &physical_filter = static_cast(filter); + if (physical_filter.is_filter_white_node() || static_cast(physical_filter).is_monotonic()) { IndexList index_list; - if (OB_FAIL(find_skipping_index(read_info, filter, index_list))) { + if (OB_FAIL(find_skipping_index(read_info, physical_filter, index_list))) { LOG_WARN("Fail to find useful skipping index", K(ret)); - } else if (OB_FAIL(find_useful_skipping_filter(index_list, filter))) { + } else if (OB_FAIL(find_useful_skipping_filter(index_list, physical_filter))) { LOG_WARN("Fail to find useful skipping filter", K(ret)); } } @@ -169,7 +173,7 @@ int ObSSTableIndexFilter::extract_skipping_filter_from_tree( int ObSSTableIndexFilter::find_skipping_index( const ObITableReadInfo* read_info, - sql::ObPushdownFilterExecutor &filter, + sql::ObPhysicalFilterExecutor &filter, IndexList &index_list) const { int ret = OB_SUCCESS; @@ -210,7 +214,7 @@ int ObSSTableIndexFilter::find_skipping_index( int ObSSTableIndexFilter::find_useful_skipping_filter( const IndexList &index_list, - sql::ObPushdownFilterExecutor &filter) + sql::ObPhysicalFilterExecutor &filter) { int ret = OB_SUCCESS; for (int64_t i = 0; OB_SUCC(ret) && i < index_list.count(); ++i) { @@ -273,16 +277,14 @@ void ObSSTableIndexFilterFactory::destroy_sstable_index_filter(ObSSTableIndexFil //////////////////////////////////////// ObSSTableIndexFilterExtracter ////////////////////////////////////////////// int ObSSTableIndexFilterExtracter::extract_skipping_filter( - const sql::ObPushdownFilterExecutor &filter, + const sql::ObPhysicalFilterExecutor &filter, const blocksstable::ObSkipIndexType skip_index_type, ObSkippingFilterNode &node) { int ret = OB_SUCCESS; switch (skip_index_type) { case blocksstable::ObSkipIndexType::MIN_MAX: - if (OB_FAIL(ObSSTableIndexFilterExtracter::extract_min_max_skipping_filter(filter, node))) { - LOG_WARN("Fail to extract min max index skipping filter", K(ret), K(skip_index_type)); - } + node.skip_index_type_ = blocksstable::ObSkipIndexType::MIN_MAX; break; default: // There are more skipping index types in the future. @@ -292,22 +294,5 @@ int ObSSTableIndexFilterExtracter::extract_skipping_filter( return ret; } -int ObSSTableIndexFilterExtracter::extract_min_max_skipping_filter( - const sql::ObPushdownFilterExecutor &filter, - ObSkippingFilterNode &node) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!filter.is_filter_node())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected not physical filter node", K(ret), K(filter.get_type())); - } else if (filter.is_filter_black_node()) { - node.set_useless(); - } else { - // min_max skipping index support all types of white filter now, - node.skip_index_type_ = blocksstable::ObSkipIndexType::MIN_MAX; - } - return ret; -} - } // namespace storage } // namespace oceanbase diff --git a/src/storage/access/ob_sstable_index_filter.h b/src/storage/access/ob_sstable_index_filter.h index 6ce797289..e43fd8b51 100644 --- a/src/storage/access/ob_sstable_index_filter.h +++ b/src/storage/access/ob_sstable_index_filter.h @@ -45,7 +45,7 @@ struct ObSkippingFilterNode bool is_already_determinate_; blocksstable::ObSkipIndexType skip_index_type_; - sql::ObPushdownFilterExecutor *filter_; + sql::ObPhysicalFilterExecutor *filter_; }; class ObSSTableIndexFilter @@ -73,7 +73,8 @@ public: int check_range( const ObITableReadInfo *read_info, blocksstable::ObMicroIndexInfo &index_info, - common::ObIAllocator &allocator); + common::ObIAllocator &allocator, + const bool use_vectorize); /// Check whether we can use skipping index. bool can_use_skipping_index() const { @@ -91,7 +92,8 @@ private: const ObITableReadInfo *read_info, blocksstable::ObMicroIndexInfo &index_info, ObSkippingFilterNode &node, - common::ObIAllocator &allocator); + common::ObIAllocator &allocator, + const bool use_vectorize); int build_skipping_filter_nodes( const ObITableReadInfo* read_info, sql::ObPushdownFilterExecutor &filter); @@ -100,11 +102,11 @@ private: sql::ObPushdownFilterExecutor &filter); int find_skipping_index( const ObITableReadInfo* read_info, - sql::ObPushdownFilterExecutor &filter, + sql::ObPhysicalFilterExecutor &filter, IndexList &index_list) const; int find_useful_skipping_filter( const IndexList &index_list, - sql::ObPushdownFilterExecutor &filter); + sql::ObPhysicalFilterExecutor &filter); private: bool is_inited_; bool is_cg_; @@ -130,13 +132,9 @@ struct ObSSTableIndexFilterExtracter { public: static int extract_skipping_filter( - const sql::ObPushdownFilterExecutor &filter, + const sql::ObPhysicalFilterExecutor &filter, const blocksstable::ObSkipIndexType skip_index_type, ObSkippingFilterNode &node); -private: - static int extract_min_max_skipping_filter( - const sql::ObPushdownFilterExecutor &filter, - ObSkippingFilterNode &node); }; } // namespace storage } // namespace oceanbase diff --git a/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.cpp index 865793330..d5e860bdf 100644 --- a/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.cpp @@ -386,60 +386,68 @@ int ObDictColumnDecoder::pushdown_operator( } LOG_TRACE("dict black filter pushdown", K(ret), K(ctx), K(filter_applied), K(pd_filter_info)); } else { - for (int64_t index = 0; OB_SUCC(ret) && (index < distinct_ref_cnt); ) { - int64_t upper_bound = MIN(index + pd_filter_info.batch_size_, distinct_ref_cnt); - const int64_t cur_ref_cnt = upper_bound - index; - // use uniform base currently, support new format later - //if (enable_rich_format && OB_FAIL(storage::init_exprs_vector_header(filter.get_filter_node().column_exprs_, filter.get_op().get_eval_ctx(), cur_ref_cnt))) { - // LOG_WARN("Failed to init exprs vector header", K(ret)); - // break; - //} - for (int64_t dict_ref = index; dict_ref < upper_bound; dict_ref++) { - datums[dict_ref - index].pack_ = dict_ref; - } - if (col_ctx.is_int_dict_type()) { - ConvertUnitToDatumFunc convert_func = convert_uint_to_datum_funcs - [ctx.int_ctx_->meta_.width_/*val_store_width_V*/] - [ObRefStoreWidthV::REF_IN_DATUMS/*ref_store_width_V*/] - [get_width_tag_map()[ctx.datum_len_]/*datum_width_V*/] - [ctx.null_flag_] - [ctx.int_ctx_->meta_.is_decimal_int()]; - convert_func(ctx, ctx.int_data_, *ctx.int_ctx_, ctx.ref_data_, nullptr, cur_ref_cnt, datums); - } else { - const uint32_t offset_width = ctx.str_ctx_->meta_.is_fixed_len_string() ? - FIX_STRING_OFFSET_WIDTH_V : ctx.offset_ctx_->meta_.width_; - ConvertStringToDatumFunc convert_func = convert_string_to_datum_funcs - [offset_width] - [ObRefStoreWidthV::REF_IN_DATUMS] - [ctx.null_flag_] - [ctx.need_copy_]; - convert_func(ctx, ctx.str_data_, *ctx.str_ctx_, - ctx.offset_data_, ctx.ref_data_, nullptr, cur_ref_cnt, datums); - } - if (ctx.obj_meta_.is_fixed_len_char_type() && (nullptr != ctx.col_param_)) { - if (OB_FAIL(storage::pad_on_datums(ctx.col_param_->get_accuracy(), - ctx.obj_meta_.get_collation_type(), *ctx.allocator_, cur_ref_cnt, datums))) { - LOG_WARN("fail to pad on datums", KR(ret), K(ctx), K(index), K(upper_bound)); + sql::ObBoolMask bool_mask; + if (OB_FAIL(check_skip_block(ctx, filter, pd_filter_info, result_bitmap, bool_mask))) { + LOG_WARN("Failed to check whether hit shortcut", KR(ret), K(ctx), K(filter), K(pd_filter_info)); + } else if (!bool_mask.is_uncertain()) { + filter_applied = true; + LOG_DEBUG("skip block in dict black filter pushdown", K(result_bitmap.popcnt())); + } else { + for (int64_t index = 0; OB_SUCC(ret) && (index < distinct_ref_cnt); ) { + int64_t upper_bound = MIN(index + pd_filter_info.batch_size_, distinct_ref_cnt); + const int64_t cur_ref_cnt = upper_bound - index; + // use uniform base currently, support new format later + //if (enable_rich_format && OB_FAIL(storage::init_exprs_vector_header(filter.get_filter_node().column_exprs_, filter.get_op().get_eval_ctx(), cur_ref_cnt))) { + // LOG_WARN("Failed to init exprs vector header", K(ret)); + // break; + //} + for (int64_t dict_ref = index; dict_ref < upper_bound; dict_ref++) { + datums[dict_ref - index].pack_ = dict_ref; + } + if (col_ctx.is_int_dict_type()) { + ConvertUnitToDatumFunc convert_func = convert_uint_to_datum_funcs + [ctx.int_ctx_->meta_.width_/*val_store_width_V*/] + [ObRefStoreWidthV::REF_IN_DATUMS/*ref_store_width_V*/] + [get_width_tag_map()[ctx.datum_len_]/*datum_width_V*/] + [ctx.null_flag_] + [ctx.int_ctx_->meta_.is_decimal_int()]; + convert_func(ctx, ctx.int_data_, *ctx.int_ctx_, ctx.ref_data_, nullptr, cur_ref_cnt, datums); + } else { + const uint32_t offset_width = ctx.str_ctx_->meta_.is_fixed_len_string() ? + FIX_STRING_OFFSET_WIDTH_V : ctx.offset_ctx_->meta_.width_; + ConvertStringToDatumFunc convert_func = convert_string_to_datum_funcs + [offset_width] + [ObRefStoreWidthV::REF_IN_DATUMS] + [ctx.null_flag_] + [ctx.need_copy_]; + convert_func(ctx, ctx.str_data_, *ctx.str_ctx_, + ctx.offset_data_, ctx.ref_data_, nullptr, cur_ref_cnt, datums); + } + if (ctx.obj_meta_.is_fixed_len_char_type() && (nullptr != ctx.col_param_)) { + if (OB_FAIL(storage::pad_on_datums(ctx.col_param_->get_accuracy(), + ctx.obj_meta_.get_collation_type(), *ctx.allocator_, cur_ref_cnt, datums))) { + LOG_WARN("fail to pad on datums", KR(ret), K(ctx), K(index), K(upper_bound)); + } + } + if (FAILEDx(filter.filter_batch(nullptr, index, upper_bound, *ref_bitmap))) { + LOG_WARN("fail to filter batch", KR(ret), K(index), K(upper_bound)); + } else { + index = upper_bound; } } - if (FAILEDx(filter.filter_batch(nullptr, index, upper_bound, *ref_bitmap))) { - LOG_WARN("fail to filter batch", KR(ret), K(index), K(upper_bound)); - } else { - index = upper_bound; - } - } - if (OB_SUCC(ret)) { - const uint32_t ref_width_size = ctx.ref_ctx_->meta_.get_uint_width_size(); - if (OB_FAIL(set_res_with_bitmap(*ctx.dict_meta_, ctx.ref_data_, - ref_width_size, ref_bitmap, pd_filter_info, datums, parent, result_bitmap))) { - LOG_WARN("fail to set result with bitmap", KR(ret), K(ref_width_size), K(pd_filter_info)); - } else { - filter_applied = true; + if (OB_SUCC(ret)) { + const uint32_t ref_width_size = ctx.ref_ctx_->meta_.get_uint_width_size(); + if (OB_FAIL(set_res_with_bitmap(*ctx.dict_meta_, ctx.ref_data_, + ref_width_size, ref_bitmap, pd_filter_info, datums, parent, result_bitmap))) { + LOG_WARN("fail to set result with bitmap", KR(ret), K(ref_width_size), K(pd_filter_info)); + } else { + filter_applied = true; + } } } LOG_TRACE("dict black filter pushdown", K(ret), K(ctx), - K(filter_applied), K(pd_filter_info), K(result_bitmap.popcnt())); + K(filter_applied), K(pd_filter_info), K(result_bitmap.popcnt())); } } } @@ -448,6 +456,36 @@ int ObDictColumnDecoder::pushdown_operator( return ret; } +int ObDictColumnDecoder::check_skip_block( + const ObDictColumnDecoderCtx &ctx, + sql::ObBlackFilterExecutor &filter, + sql::PushdownFilterInfo &pd_filter_info, + ObBitmap &result_bitmap, + sql::ObBoolMask &bool_mask) +{ + int ret = OB_SUCCESS; + const uint64_t dict_val_cnt = ctx.dict_meta_->distinct_val_cnt_; + const bool has_null = ctx.dict_meta_->has_null(); + if (!filter.is_monotonic()) { + } else if (dict_val_cnt <= CS_DICT_SKIP_THRESHOLD) { + // Do not skip block when dict count is small. + // Otherwise, if can not skip block by monotonicity, the performance will decrease. + } else if (ctx.dict_meta_->is_sorted()) { + ObStorageDatum min_datum = *ObDictValueIterator(&ctx, 0); + ObStorageDatum max_datum = *(ObDictValueIterator(&ctx, dict_val_cnt) - 1); + if (OB_FAIL(check_skip_by_monotonicity(filter, + min_datum, + max_datum, + *pd_filter_info.skip_bit_, + has_null, + &result_bitmap, + bool_mask))) { + LOG_WARN("Failed to check can skip by monotonicity", K(ret), K(min_datum), K(max_datum), K(filter)); + } + } + return ret; +} + int ObDictColumnDecoder::pushdown_operator( const sql::ObPushdownFilterExecutor *parent, const ObColumnCSDecoderCtx &col_ctx, diff --git a/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.h b/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.h index 020c13520..f5361dc33 100644 --- a/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.h +++ b/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.h @@ -98,6 +98,7 @@ public: protected: const static int64_t MAX_STACK_BUF_SIZE = 4 << 10; // 4K + const static int64_t CS_DICT_SKIP_THRESHOLD = 32; virtual int decode_and_aggregate( const ObColumnCSDecoderCtx &ctx, const int64_t row_id, @@ -107,6 +108,14 @@ protected: UNUSEDx(ctx, row_id, datum, agg_cell); return OB_NOT_SUPPORTED; } + + static int check_skip_block( + const ObDictColumnDecoderCtx &ctx, + sql::ObBlackFilterExecutor &filter, + sql::PushdownFilterInfo &pd_filter_info, + ObBitmap &result_bitmap, + sql::ObBoolMask &bool_mask); + static int extract_ref_and_null_count_( const ObConstEncodingRefDesc &ref_desc, const int64_t dict_count, diff --git a/src/storage/blocksstable/encoding/ob_dict_decoder.cpp b/src/storage/blocksstable/encoding/ob_dict_decoder.cpp index fb517dead..5285066f8 100644 --- a/src/storage/blocksstable/encoding/ob_dict_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_dict_decoder.cpp @@ -1504,7 +1504,6 @@ int ObDictDecoder::pushdown_operator( } else if (meta_header_->count_ > pd_filter_info.count_ || meta_header_->count_ > col_ctx.micro_block_header_->row_count_ * 0.8) { } else { - void *buf = nullptr; common::ObBitmap *ref_bitmap = nullptr; common::ObDatum *datums = nullptr; ObSEArray datum_infos; @@ -1538,7 +1537,7 @@ int ObDictDecoder::pushdown_operator( datums))) { LOG_WARN("Failed to batch decode referenes from dict", K(ret), K(col_ctx)); } else if (col_ctx.obj_meta_.is_fixed_len_char_type() && nullptr != col_ctx.col_param_ && - OB_FAIL(storage::pad_on_datums(col_ctx.col_param_->get_accuracy(), + OB_FAIL(storage::pad_on_datums(col_ctx.col_param_->get_accuracy(), col_ctx.obj_meta_.get_collation_type(), *col_ctx.allocator_, upper_bound - index, @@ -1563,6 +1562,41 @@ int ObDictDecoder::pushdown_operator( return ret; } +// Not used for now because if the block can not be skipped by monotonicy, the performance will descrease. +// There are two reasons for the decline in performance. +// 1. check_has_null() need to traverse all refs. +// 2. check_skip_by_monotonicity() brings redundant comparisons. +int ObDictDecoder::check_skip_block( + const ObColumnDecoderCtx &col_ctx, + sql::ObBlackFilterExecutor &filter, + sql::PushdownFilterInfo &pd_filter_info, + ObBitmap &result_bitmap, + sql::ObBoolMask &bool_mask) const +{ + int ret = OB_SUCCESS; + bool has_null = false; + if (!filter.is_monotonic()) { + } else if (meta_header_->count_ <= DICT_SKIP_THRESHOLD) { + // Do not skip block when dict count is small. + // Otherwise, if can not skip block by monotonicity, the performance will decrease. + } else if (OB_FAIL(check_has_null(col_ctx, col_ctx.col_header_->length_, has_null))) { + LOG_WARN("Failed to check has null", K(ret)); + } else if (meta_header_->is_sorted_dict()) { + ObStorageDatum min_datum = *begin(&col_ctx, col_ctx.col_header_->length_); + ObStorageDatum max_datum = *(end(&col_ctx, col_ctx.col_header_->length_) - 1); + if (OB_FAIL(check_skip_by_monotonicity(filter, + min_datum, + max_datum, + *pd_filter_info.skip_bit_, + has_null, + &result_bitmap, + bool_mask))) { + LOG_WARN("Failed to check can skip by monotonicity", K(ret), K(min_datum), K(max_datum), K(filter)); + } + } + return ret; +} + int ObDictDecoder::set_res_with_bitmap( const sql::ObPushdownFilterExecutor *parent, const ObColumnDecoderCtx &col_ctx, diff --git a/src/storage/blocksstable/encoding/ob_dict_decoder.h b/src/storage/blocksstable/encoding/ob_dict_decoder.h index b56f91bf4..e868a0db0 100644 --- a/src/storage/blocksstable/encoding/ob_dict_decoder.h +++ b/src/storage/blocksstable/encoding/ob_dict_decoder.h @@ -155,6 +155,7 @@ public: ObDictDecoderIterator end(const ObColumnDecoderCtx *ctx, int64_t meta_length) const; private: + static const int DICT_SKIP_THRESHOLD = 32; bool fast_eq_ne_operator_valid( const int64_t dict_ref_cnt, const ObColumnDecoderCtx &col_ctx) const; @@ -162,6 +163,13 @@ private: const ObColumnDecoderCtx &col_ctx, const ObDatum &ref_datum) const; + int check_skip_block( + const ObColumnDecoderCtx &col_ctx, + sql::ObBlackFilterExecutor &filter, + sql::PushdownFilterInfo &pd_filter_info, + ObBitmap &result_bitmap, + sql::ObBoolMask &bool_mask) const; + // unpacked refs should be stores in datums.pack_ int batch_get_bitpacked_refs( const int64_t *row_ids, diff --git a/src/storage/blocksstable/index_block/ob_skip_index_filter_executor.cpp b/src/storage/blocksstable/index_block/ob_skip_index_filter_executor.cpp index b65e5d828..ef68ebb57 100644 --- a/src/storage/blocksstable/index_block/ob_skip_index_filter_executor.cpp +++ b/src/storage/blocksstable/index_block/ob_skip_index_filter_executor.cpp @@ -17,6 +17,26 @@ namespace oceanbase namespace blocksstable { +int ObSkipIndexFilterExecutor::init(const int64_t batch_size, common::ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObSkipIndexFilterExecutor has been inited", K(ret)); + } else if (OB_UNLIKELY(batch_size <= 0 || nullptr == allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid batch_size or allocator", K(ret), K(batch_size), KP(allocator)); + } else if (OB_ISNULL(skip_bit_ = sql::to_bit_vector(allocator->alloc(sql::ObBitVector::memory_size(batch_size))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Failed to alloc memory for skip bit", K(ret), K(batch_size)); + } else { + skip_bit_->init(batch_size); + allocator_ = allocator; + is_inited_ = true; + } + return ret; +} + int ObSkipIndexFilterExecutor::read_aggregate_data(const uint32_t col_idx, common::ObIAllocator &allocator, const share::schema::ObColumnParam *col_param, @@ -52,14 +72,18 @@ int ObSkipIndexFilterExecutor::falsifiable_pushdown_filter( const ObObjMeta &obj_meta, const ObSkipIndexType index_type, const ObMicroIndexInfo &index_info, - sql::ObWhiteFilterExecutor &filter, - common::ObIAllocator &allocator) + sql::ObPhysicalFilterExecutor &filter, + common::ObIAllocator &allocator, + const bool use_vectorize) { int ret = OB_SUCCESS; - reset(); - if (OB_UNLIKELY(!index_info.has_agg_data())) { + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObSkipIndexFilterExecutor has not been inited", K(ret)); + } else if (OB_UNLIKELY(!index_info.has_agg_data())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument", K(ret), K(index_info)); + } else if (FALSE_IT(agg_row_reader_.reset())) { } else if (OB_FAIL(agg_row_reader_.init(index_info.agg_row_buf_, index_info.agg_buf_size_))) { LOG_WARN("failed to init agg row reader", K(ret)); } else { @@ -67,20 +91,28 @@ int ObSkipIndexFilterExecutor::falsifiable_pushdown_filter( case ObSkipIndexType::MIN_MAX: { if (filter.is_filter_dynamic_node()) { sql::ObDynamicFilterExecutor &dynamic_filter = - static_cast(filter); + static_cast(filter); if (!dynamic_filter.is_data_prepared()) { filter.get_filter_bool_mask().set_uncertain(); } else if (dynamic_filter.is_filter_all_data()) { filter.get_filter_bool_mask().set_always_false(); } else if (dynamic_filter.is_pass_all_data()) { filter.get_filter_bool_mask().set_always_true(); - } else if (OB_FAIL(filter_on_min_max(col_idx, index_info.get_row_count(), - obj_meta, filter, allocator))) { - LOG_WARN("Fail to filter on min_max for dynamic filter", K(ret), K(col_idx)); } - } else if (OB_FAIL(filter_on_min_max(col_idx, index_info.get_row_count(), - obj_meta, filter, allocator))) { - LOG_WARN("Fail to filter on min_max", K(ret), K(col_idx)); + } else if (filter.is_filter_white_node()) { + sql::ObWhiteFilterExecutor &white_filter = + static_cast(filter); + if (OB_FAIL(filter_on_min_max(col_idx, index_info.get_row_count(), + obj_meta, white_filter, allocator))) { + LOG_WARN("Failed to filter on min_max for white filter", K(ret), K(col_idx)); + } + } else if (filter.is_filter_black_node()) { + sql::ObBlackFilterExecutor &black_filter = + static_cast(filter); + if (OB_FAIL(black_filter_on_min_max(col_idx, index_info.get_row_count(), + obj_meta, black_filter, allocator, use_vectorize))) { + LOG_WARN("Failed to filter on min_max for black filter", K(ret), K(col_idx)); + } } break; } @@ -114,8 +146,8 @@ int ObSkipIndexFilterExecutor::filter_on_min_max( } else if (null_count.is_null() && min_datum.is_null() && max_datum.is_null()) { // min max null_count all null, expect uncertain cause by progressive merge fal_desc.set_uncertain(); - } else if (null_count.is_null() || null_count.get_int() < 0 || null_count.get_int() > row_count || - min_datum.is_null() != max_datum.is_null()) { + } else if (OB_UNLIKELY(null_count.is_null() || null_count.get_int() < 0 || null_count.get_int() > row_count || + min_datum.is_null() != max_datum.is_null())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("not correct min_max agg info", K(ret), K(col_idx), K(row_count), K(null_count), K(min_datum), K(max_datum)); @@ -540,5 +572,60 @@ int ObSkipIndexFilterExecutor::bt_operator(const sql::ObWhiteFilterExecutor &fil } return ret; } + +int ObSkipIndexFilterExecutor::black_filter_on_min_max( + const uint32_t col_idx, + const uint64_t row_count, + const ObObjMeta &obj_meta, + sql::ObBlackFilterExecutor &filter, + common::ObIAllocator &allocator, + const bool use_vectorize) +{ + int ret = OB_SUCCESS; + sql::ObBoolMask &fal_desc = filter.get_filter_bool_mask(); + const share::schema::ObColumnParam *col_param = filter.get_col_params().at(0); + ObStorageDatum null_count; + ObStorageDatum min_datum; + ObStorageDatum max_datum; + if (OB_UNLIKELY(!filter.is_monotonic())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid black filter, filter is not monotonic", K(ret), K(filter)); + } else if (OB_FAIL(read_aggregate_data(col_idx, allocator, col_param, + obj_meta, null_count, min_datum, max_datum))) { + LOG_WARN("Failed to read min and max", K(ret), K(col_idx)); + } else if (null_count.is_null() && min_datum.is_null() && max_datum.is_null()) { + // min max null_count all null, expect uncertain cause by progressive merge + fal_desc.set_uncertain(); + } else if (OB_UNLIKELY(null_count.is_null() || null_count.get_int() < 0 || null_count.get_int() > row_count || + min_datum.is_null() != max_datum.is_null())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Not correct min_max agg info", K(ret), K(col_idx), K(row_count), + K(null_count), K(min_datum), K(max_datum)); + } else if (use_vectorize && + filter.get_op().enable_rich_format_ && + OB_FAIL(init_exprs_uniform_header(filter.get_cg_col_exprs(), + filter.get_op().get_eval_ctx(), + filter.get_op().get_eval_ctx().max_batch_size_))) { + LOG_WARN("Failed to init exprs vector header", K(ret)); + } else { + const bool is_all_null = null_count.get_int() == row_count; + const bool has_null = null_count.get_int() > 0 && null_count.get_int() < row_count; + if (is_all_null) { + fal_desc.set_always_false(); + } else if (OB_FAIL(check_skip_by_monotonicity(filter, + min_datum, + max_datum, + *skip_bit_, + has_null, + nullptr, /*result_bitmap*/ + fal_desc))) { + LOG_WARN("Failed to check can skip by monotonicity", K(ret), K(min_datum), K(max_datum), K(has_null), K(filter)); + } + } + LOG_DEBUG("Utilize skip index judge black filter", K(ret), K(fal_desc), K(min_datum), K(max_datum), + K(null_count), K(row_count), K(filter)); + return ret; +} + } // end namespace blocksstable } // end namespace oceanbase diff --git a/src/storage/blocksstable/index_block/ob_skip_index_filter_executor.h b/src/storage/blocksstable/index_block/ob_skip_index_filter_executor.h index 163e70482..a7bbd1c0e 100644 --- a/src/storage/blocksstable/index_block/ob_skip_index_filter_executor.h +++ b/src/storage/blocksstable/index_block/ob_skip_index_filter_executor.h @@ -28,18 +28,28 @@ class ObSkipIndexFilterExecutor final { public: ObSkipIndexFilterExecutor() - : agg_row_reader_(), meta_() {} + : agg_row_reader_(), meta_(), skip_bit_(nullptr), allocator_(nullptr), is_inited_(false) {} ~ObSkipIndexFilterExecutor() { reset(); } void reset() { agg_row_reader_.reset(); + if (OB_NOT_NULL(allocator_)) { + if (OB_NOT_NULL(skip_bit_)) { + allocator_->free(skip_bit_); + skip_bit_ = nullptr; + } + } + allocator_ = nullptr; + is_inited_ = false; } + int init(const int64_t batch_size, common::ObIAllocator *allocator); int falsifiable_pushdown_filter(const uint32_t col_idx, const ObObjMeta &obj_meta, const ObSkipIndexType index_type, const ObMicroIndexInfo &index_info, - sql::ObWhiteFilterExecutor &filter, - common::ObIAllocator &allocator); + sql::ObPhysicalFilterExecutor &filter, + common::ObIAllocator &allocator, + const bool use_vectorize); private: int filter_on_min_max(const uint32_t col_idx, @@ -99,9 +109,19 @@ private: const common::ObDatum &min_datum, const common::ObDatum &max_datum, sql::ObBoolMask &fal_desc); + + int black_filter_on_min_max(const uint32_t col_idx, + const uint64_t row_count, + const ObObjMeta &obj_meta, + sql::ObBlackFilterExecutor &filter, + common::ObIAllocator &allocator, + const bool use_vectorize); private: ObAggRowReader agg_row_reader_; ObSkipIndexColMeta meta_; + sql::ObBitVector *skip_bit_; // to be compatible with the black filter filter() method + common::ObIAllocator *allocator_; + bool is_inited_; DISALLOW_COPY_AND_ASSIGN(ObSkipIndexFilterExecutor); }; diff --git a/src/storage/blocksstable/ob_micro_block_row_scanner.cpp b/src/storage/blocksstable/ob_micro_block_row_scanner.cpp index 89bf433db..108d25fbe 100644 --- a/src/storage/blocksstable/ob_micro_block_row_scanner.cpp +++ b/src/storage/blocksstable/ob_micro_block_row_scanner.cpp @@ -886,7 +886,7 @@ int ObIMicroBlockRowScanner::filter_micro_block_in_cg( } } if (OB_FAIL(ret)) { - } else if (OB_FAIL(pd_filter_info.filter_->execute(parent, pd_filter_info, this, true))) { + } else if (OB_FAIL(pd_filter_info.filter_->execute(parent, pd_filter_info, this, param_->vectorized_enabled_))) { LOG_WARN("Fail to filter", K(ret), KPC(pd_filter_info.filter_), K(pd_filter_info.start_), K(pd_filter_info.count_)); } else { access_count = row_count; diff --git a/src/storage/column_store/ob_cg_prefetcher.cpp b/src/storage/column_store/ob_cg_prefetcher.cpp index 268d2bcce..fe3e18b48 100644 --- a/src/storage/column_store/ob_cg_prefetcher.cpp +++ b/src/storage/column_store/ob_cg_prefetcher.cpp @@ -399,7 +399,8 @@ int ObCGPrefetcher::prefetch_micro_data() } else if (nullptr != sstable_index_filter_ && can_index_filter_skip(block_info) && OB_FAIL(sstable_index_filter_->check_range( - iter_param_->read_info_, block_info, *(access_ctx_->allocator_)))) { + iter_param_->read_info_, block_info, + *(access_ctx_->allocator_), iter_param_->vectorized_enabled_))) { LOG_WARN("Fail to check if can skip prefetch", K(ret), K(block_info)); } else if (nullptr != sstable_index_filter_ && (block_info.is_filter_always_false() || block_info.is_filter_always_true())) { @@ -526,7 +527,8 @@ int ObCGPrefetcher::ObCSIndexTreeLevelHandle::prefetch( && OB_FAIL(prefetcher.sstable_index_filter_->check_range( prefetcher.iter_param_->read_info_, index_info, - *(prefetcher.access_ctx_->allocator_)))) { + *(prefetcher.access_ctx_->allocator_), + prefetcher.iter_param_->vectorized_enabled_))) { LOG_WARN("Fail to check if can skip prefetch", K(ret), K(index_info)); // TODO: skip data block which is always_false/always_true and record the result in filter bitmap } else if (OB_FAIL(prefetcher.can_agg_micro_index(index_info, can_agg))) { @@ -732,7 +734,8 @@ int ObCGPrefetcher::prewarm() && block_info.has_agg_data() && block_info.is_filter_uncertain() && OB_FAIL(sstable_index_filter_->check_range( - iter_param_->read_info_, block_info, *(access_ctx_->allocator_)))) { + iter_param_->read_info_, block_info, + *(access_ctx_->allocator_), iter_param_->vectorized_enabled_))) { LOG_WARN("Fail to check if can skip prefetch", K(ret), K(block_info)); } else if (nullptr != sstable_index_filter_ && (block_info.is_filter_always_false() || block_info.is_filter_always_true())) { diff --git a/src/storage/ob_storage_util.cpp b/src/storage/ob_storage_util.cpp index 44a338d23..f966b2f1b 100644 --- a/src/storage/ob_storage_util.cpp +++ b/src/storage/ob_storage_util.cpp @@ -15,6 +15,7 @@ #include "share/datum/ob_datum.h" #include "share/object/ob_obj_cast.h" #include "share/vector/ob_discrete_format.h" +#include "sql/engine/basic/ob_pushdown_filter.h" #include "sql/engine/ob_exec_context.h" #include "storage/blocksstable/ob_datum_row.h" @@ -502,6 +503,90 @@ int fill_exprs_lob_locator( return ret; } +// Monotonic black filter only support ">", ">=", "<", "<=", "=" five types. +// All of these monotonic black filters will return false if the input is null. +// When has_null is true, we can not set_always_true() for bool_mask but can judge always false. +int check_skip_by_monotonicity( + sql::ObBlackFilterExecutor &filter, + blocksstable::ObStorageDatum &min_datum, + blocksstable::ObStorageDatum &max_datum, + const sql::ObBitVector &skip_bit, + const bool has_null, + ObBitmap *result_bitmap, + sql::ObBoolMask &bool_mask) +{ + int ret = OB_SUCCESS; + bool_mask.set_uncertain(); + if (min_datum.is_null() || max_datum.is_null()) { + // uncertain + } else { + const sql::PushdownFilterMonotonicity mono = filter.get_monotonicity(); + bool is_asc = false; + switch (mono) { + case sql::PushdownFilterMonotonicity::MON_ASC: { + is_asc = true; + } + case sql::PushdownFilterMonotonicity::MON_DESC: { + bool filtered = false; + ObStorageDatum &false_datum = is_asc ? max_datum : min_datum; + ObStorageDatum &true_datum = is_asc ? min_datum : max_datum; + if (OB_FAIL(filter.filter(false_datum, skip_bit, filtered))) { + STORAGE_LOG(WARN, "Failed to compare with false_datum", K(ret), K(false_datum), K(is_asc)); + } else if (filtered) { + bool_mask.set_always_false(); + } else if (!has_null) { + if (OB_FAIL(filter.filter(true_datum, skip_bit, filtered))) { + STORAGE_LOG(WARN, "Failed to compare with true_datum", K(ret), K(true_datum), K(is_asc)); + } else if (!filtered) { + bool_mask.set_always_true(); + } + } + break; + } + case sql::PushdownFilterMonotonicity::MON_EQ_ASC: { + is_asc = true; + } + case sql::PushdownFilterMonotonicity::MON_EQ_DESC: { + bool min_cmp_res = false; + bool max_cmp_res = false; + if (OB_FAIL(filter.judge_greater_or_less(min_datum, skip_bit, is_asc, min_cmp_res))) { + STORAGE_LOG(WARN, "Failed to judge min_datum", K(ret), K(min_datum)); + } else if (min_cmp_res) { + bool_mask.set_always_false(); + } else if (OB_FAIL(filter.judge_greater_or_less(max_datum, skip_bit, !is_asc, max_cmp_res))) { + STORAGE_LOG(WARN, "Failed to judge max_datum", K(ret), K(max_datum)); + } else if (max_cmp_res) { + bool_mask.set_always_false(); + } else if (!has_null) { + if (OB_FAIL(filter.filter(min_datum, skip_bit, min_cmp_res))) { + STORAGE_LOG(WARN, "Failed to compare with min_datum", K(ret), K(min_datum)); + } else if (min_cmp_res) { + // min datum is filtered + } else if (OB_FAIL(filter.filter(max_datum, skip_bit, max_cmp_res))) { + STORAGE_LOG(WARN, "Failed to compare with max_datum", K(ret), K(max_datum)); + } else if (!max_cmp_res) { + // min datum and max datum are both not filtered + bool_mask.set_always_true(); + } + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected monotonicity", K(ret), K(mono)); + } + } + } + if (OB_SUCC(ret) && nullptr != result_bitmap){ + if (bool_mask.is_always_false()) { + result_bitmap->reuse(false); + } else if (bool_mask.is_always_true()) { + result_bitmap->reuse(true); + } + } + return ret; +} + } } diff --git a/src/storage/ob_storage_util.h b/src/storage/ob_storage_util.h index 61c26fcc7..8598af41e 100644 --- a/src/storage/ob_storage_util.h +++ b/src/storage/ob_storage_util.h @@ -26,6 +26,15 @@ namespace schema class ObColumnParam; } } +namespace common +{ +class ObBitmap; +} +namespace sql +{ +struct ObBoolMask; +class ObBlackFilterExecutor; +} namespace blocksstable { struct ObStorageDatum; @@ -77,6 +86,13 @@ int fill_exprs_lob_locator(const ObTableIterParam &iter_param, const int64_t vec_offset, const int64_t row_cap); +int check_skip_by_monotonicity(sql::ObBlackFilterExecutor &filter, + blocksstable::ObStorageDatum &min_datum, + blocksstable::ObStorageDatum &max_datum, + const sql::ObBitVector &skip_bit, + const bool has_null, + ObBitmap *result_bitmap, + sql::ObBoolMask &bool_mask); int cast_obj(const common::ObObjMeta &src_meta, common::ObIAllocator &cast_allocator, common::ObObj &obj); diff --git a/unittest/storage/blocksstable/test_skip_index_filter.cpp b/unittest/storage/blocksstable/test_skip_index_filter.cpp index 6fc51b4f7..990a92df5 100644 --- a/unittest/storage/blocksstable/test_skip_index_filter.cpp +++ b/unittest/storage/blocksstable/test_skip_index_filter.cpp @@ -317,6 +317,7 @@ int TestSkipIndexFilter::test_skip_index_filter_pushdown ( sql::ObPushdownExprSpec expr_spec(allocator_); sql::ObPushdownOperator op(eval_ctx, expr_spec); sql::ObWhiteFilterExecutor filter(allocator_, filter_node, op); + eval_ctx.batch_size_ = 256; filter.col_offsets_.init(COLUMN_CNT); filter.col_params_.init(COLUMN_CNT); const ObColumnParam *col_param = nullptr; @@ -381,9 +382,10 @@ int TestSkipIndexFilter::test_skip_index_filter_pushdown ( index_info.agg_row_buf_ = buf; index_info.agg_buf_size_ = buf_size; index_info.row_header_ = &row_header; + EXPECT_EQ(OB_SUCCESS, skip_index_filter.init(op.get_eval_ctx().get_batch_size(), &allocator_)); ret = skip_index_filter.falsifiable_pushdown_filter(col_idx, filter.filter_.expr_->args_[0]->obj_meta_, - ObSkipIndexType::MIN_MAX, index_info, filter, allocator_); + ObSkipIndexType::MIN_MAX, index_info, filter, allocator_, true); fal_desc = filter.get_filter_bool_mask(); diff --git a/unittest/storage/blocksstable/test_sstable_index_filter.cpp b/unittest/storage/blocksstable/test_sstable_index_filter.cpp index a1e207ecc..ee6da2b2c 100644 --- a/unittest/storage/blocksstable/test_sstable_index_filter.cpp +++ b/unittest/storage/blocksstable/test_sstable_index_filter.cpp @@ -81,10 +81,15 @@ void TestSSTableIndexFilter::TearDown() {} void TestSSTableIndexFilter::init() { ObIAllocator* allocator_ptr = &allocator_; + exec_ctx_ = OB_NEWx(ObExecContext, allocator_ptr, allocator_); + eval_ctx_ = OB_NEWx(ObEvalCtx, allocator_ptr, *exec_ctx_); expr_spec_ = OB_NEWx(ObPushdownExprSpec, allocator_ptr, allocator_); pushdown_operator_ = OB_NEWx(ObPushdownOperator, allocator_ptr, *eval_ctx_, *expr_spec_); + ASSERT_NE(nullptr, exec_ctx_); + ASSERT_NE(nullptr, eval_ctx_); ASSERT_NE(nullptr, expr_spec_); ASSERT_NE(nullptr, pushdown_operator_); + eval_ctx_->batch_size_ = 256; row_header_.row_count_ = TEST_ROW_CNT; read_info_.cols_index_.array_.init(1, allocator_); read_info_.cols_index_.array_.push_back(TEST_COLUMN_INDEX); @@ -187,7 +192,7 @@ void TestSSTableIndexFilter::test_sstable_index_filter_check_range_1() ObObj min_obj; min_obj.set_uint64(100); init_micro_index_info(max_obj, min_obj, index_info); - OK(index_filter.check_range(&read_info_, index_info, allocator_)); + OK(index_filter.check_range(&read_info_, index_info, allocator_, true)); ASSERT_TRUE(index_info.is_filter_always_false()); ObMicroIndexInfo index_info2; @@ -196,7 +201,7 @@ void TestSSTableIndexFilter::test_sstable_index_filter_check_range_1() ObObj min_obj2; min_obj2.set_uint64(40); init_micro_index_info(max_obj2, min_obj2, index_info2); - OK(index_filter.check_range(&read_info_, index_info2, allocator_)); + OK(index_filter.check_range(&read_info_, index_info2, allocator_, true)); ASSERT_TRUE(index_info2.is_filter_uncertain()); childs[2] = create_lt_white_filter(100); @@ -210,7 +215,7 @@ void TestSSTableIndexFilter::test_sstable_index_filter_check_range_1() ObObj min_obj3; min_obj3.set_uint64(0); init_micro_index_info(max_obj3, min_obj3, index_info3); - OK(index_filter2.check_range(&read_info_, index_info3, allocator_)); + OK(index_filter2.check_range(&read_info_, index_info3, allocator_, true)); ASSERT_TRUE(index_info3.is_filter_always_true()); } @@ -382,8 +387,8 @@ TEST_F(TestSSTableIndexFilter, test_bool_mask) TEST_F(TestSSTableIndexFilter, test_sstable_index_filter_extracter) { - ObPushdownFilterExecutor *white_filter = create_physical_filter(true); - ObPushdownFilterExecutor *black_filter = create_physical_filter(false); + ObPhysicalFilterExecutor *white_filter = static_cast(create_physical_filter(true)); + ObPhysicalFilterExecutor *black_filter = static_cast(create_physical_filter(false)); ASSERT_TRUE(nullptr != white_filter); ASSERT_TRUE(nullptr != black_filter); ObSkippingFilterNode node1; @@ -392,7 +397,7 @@ TEST_F(TestSSTableIndexFilter, test_sstable_index_filter_extracter) OK(ObSSTableIndexFilterExtracter::extract_skipping_filter(*white_filter, skip_index_type, node1)); OK(ObSSTableIndexFilterExtracter::extract_skipping_filter(*black_filter, skip_index_type, node2)); ASSERT_TRUE(node1.is_useful()); - ASSERT_FALSE(node2.is_useful()); + ASSERT_TRUE(node2.is_useful()); } TEST_F(TestSSTableIndexFilter, test_skipping_filter_nodes_builder)