[CP] Fix calc redundant filter selectivity bug

This commit is contained in:
xianyu-w 2024-10-12 04:14:05 +00:00 committed by ob-robot
parent 3b1cb4ce42
commit f375b89c4b
7 changed files with 155 additions and 64 deletions

View File

@ -797,7 +797,7 @@ int ObOptSelectivity::calculate_conditional_selectivity(const OptTableMetas &tab
double new_sel = 1.0;
if (OB_FAIL(append(total_filters, append_filters))) {
LOG_WARN("failed to append filters", K(ret));
} else if (total_sel > OB_DOUBLE_EPSINON && !ctx.get_correlation_model().is_independent()) {
} else if (total_sel > OB_DOUBLE_EPSINON) {
if (OB_FAIL(calculate_selectivity(table_metas,
ctx,
total_filters,
@ -805,6 +805,7 @@ int ObOptSelectivity::calculate_conditional_selectivity(const OptTableMetas &tab
all_predicate_sel))) {
LOG_WARN("failed to calculate selectivity", K(total_filters), K(ret));
} else {
new_sel = std::min(new_sel, total_sel);
conditional_sel = new_sel / total_sel;
total_sel = new_sel;
}
@ -823,28 +824,55 @@ int ObOptSelectivity::calculate_conditional_selectivity(const OptTableMetas &tab
int ObOptSelectivity::calculate_selectivity(const OptTableMetas &table_metas,
const OptSelectivityCtx &ctx,
ObIArray<ObSelEstimator *> &sel_estimators,
double &selectivity)
double &selectivity,
common::ObIArray<ObExprSelPair> &all_predicate_sel,
bool record_range_sel)
{
int ret = OB_SUCCESS;
selectivity = 1.0;
ObSEArray<double, 4> selectivities;
ObSEArray<ObExprSelPair, 1> dummy;
ObSEArray<const ObRawExpr *, 4> eigen_exprs;
for (int64_t i = 0; OB_SUCC(ret) && i < sel_estimators.count(); ++i) {
ObSelEstimator *estimator = sel_estimators.at(i);
double tmp_selectivity = 0.0;
if (OB_ISNULL(sel_estimators.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("estimator is null", K(ret), K(sel_estimators));
} else if (OB_FAIL(estimator->get_sel(table_metas, ctx, tmp_selectivity, dummy))) {
} else if (OB_FAIL(estimator->get_sel(table_metas, ctx, tmp_selectivity, all_predicate_sel))) {
LOG_WARN("failed to get sel", K(ret), KPC(estimator));
} else if (OB_FAIL(selectivities.push_back(revise_between_0_1(tmp_selectivity)))) {
LOG_WARN("failed to push back", K(ret));
} else {
tmp_selectivity = revise_between_0_1(tmp_selectivity);
}
if (OB_SUCC(ret) && record_range_sel &&
ObSelEstType::COLUMN_RANGE == estimator->get_type()) {
ObRangeSelEstimator *range_estimator = static_cast<ObRangeSelEstimator *>(estimator);
if (OB_FAIL(add_var_to_array_no_dup(all_predicate_sel,
ObExprSelPair(range_estimator->get_column_expr(), tmp_selectivity, true)))) {
LOG_WARN("failed to add selectivity to plan", K(ret), KPC(range_estimator), K(tmp_selectivity));
}
}
// Use the minimum selectivity from estimators with the same eigen expression
if (OB_SUCC(ret)) {
int64_t idx = -1;
const ObRawExpr *eigen_expr = estimator->get_eigen_expr();
if (NULL == eigen_expr || !ObOptimizerUtil::find_equal_expr(eigen_exprs, eigen_expr, idx)) {
if (OB_FAIL(eigen_exprs.push_back(eigen_expr)) ||
OB_FAIL(selectivities.push_back(tmp_selectivity))) {
LOG_WARN("failed to push back", K(ret));
}
} else if (OB_UNLIKELY(idx < 0 || idx >= selectivities.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected idx", K(idx), K(selectivities), K(eigen_exprs));
} else {
selectivities.at(idx) = std::min(tmp_selectivity, selectivities.at(idx));
}
}
}
if (OB_SUCC(ret)) {
selectivity = ctx.get_correlation_model().combine_filters_selectivity(selectivities);
}
LOG_DEBUG("calculate predicates selectivity", K(selectivity), K(selectivities), K(sel_estimators));
LOG_DEBUG("calculate predicates selectivity", K(selectivity), K(selectivities), K(eigen_exprs), K(sel_estimators));
return ret;
}
@ -887,31 +915,8 @@ int ObOptSelectivity::calculate_selectivity(const OptTableMetas &table_metas,
LOG_PRINT_EXPR(TRACE, "calculate one qual selectivity", *qual, K(single_sel));
}
}
if (OB_SUCC(ret) && OB_FAIL(selectivities.prepare_allocate(sel_estimators.count()))) {
LOG_WARN("failed to prepare allocate", K(ret), K(selectivities), K(sel_estimators));
}
for (int64_t i = 0; OB_SUCC(ret) && i < sel_estimators.count(); ++i) {
ObSelEstimator *estimator = sel_estimators.at(i);
double tmp_selectivity = 0.0;
if (OB_ISNULL(sel_estimators.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("estimator is null", K(ret), K(sel_estimators));
} else if (OB_FAIL(estimator->get_sel(table_metas, ctx, tmp_selectivity, all_predicate_sel))) {
LOG_WARN("failed to get sel", K(ret), KPC(estimator));
} else {
selectivities.at(i) = revise_between_0_1(tmp_selectivity);
if (ObSelEstType::COLUMN_RANGE == estimator->get_type()) {
ObRangeSelEstimator *range_estimator = static_cast<ObRangeSelEstimator *>(estimator);
if (OB_FAIL(add_var_to_array_no_dup(all_predicate_sel,
ObExprSelPair(range_estimator->get_column_expr(), tmp_selectivity, true)))) {
LOG_WARN("failed to add selectivity to plan", K(ret), KPC(range_estimator), K(tmp_selectivity));
}
}
}
}
if (OB_SUCC(ret)) {
selectivity = ctx.get_correlation_model().combine_filters_selectivity(selectivities);
LOG_DEBUG("calculate predicates selectivity", K(selectivity), K(selectivities), K(sel_estimators));
if (FAILEDx(calculate_selectivity(table_metas, ctx, sel_estimators, selectivity, all_predicate_sel, true))) {
LOG_WARN("failed to calculate estimator selectivity", K(ret), K(selectivities), K(sel_estimators));
}
return ret;
}
@ -3745,7 +3750,7 @@ int ObOptSelectivity::classify_quals(const OptTableMetas &table_metas,
obj_max.set_max_value();
if (OB_FAIL(factory.create_estimators(ctx, sel_info.quals_, estimators))) {
LOG_WARN("failed to create estimators", K(ret));
} else if (OB_FAIL(calculate_selectivity(table_metas, ctx, estimators, sel_info.selectivity_))) {
} else if (OB_FAIL(calculate_selectivity(table_metas, ctx, estimators, sel_info.selectivity_, all_predicate_sel))) {
LOG_WARN("failed to calc sel", K(ret));
}
for (int64_t j = 0; OB_SUCC(ret) && NULL == range_estimator && j < estimators.count(); j ++) {

View File

@ -645,9 +645,10 @@ public:
static int calculate_selectivity(const OptTableMetas &table_metas,
const OptSelectivityCtx &ctx,
ObIArray<ObSelEstimator *> &sel_estimators,
double &selectivity);
double &selectivity,
common::ObIArray<ObExprSelPair> &all_predicate_sel,
bool record_range_sel = false);
// @brief 计算一组条件的选择率,条件之间是and关系,基于独立性假设
static int calculate_selectivity(const OptTableMetas &table_metas,
const OptSelectivityCtx &ctx,
const common::ObIArray<ObRawExpr*> &quals,

View File

@ -1570,6 +1570,22 @@ bool ObOptimizerUtil::find_equal_expr(const ObIArray<ObRawExpr *> &exprs,
return found;
}
bool ObOptimizerUtil::find_equal_expr(const ObIArray<const ObRawExpr *> &exprs,
const ObRawExpr *expr,
int64_t &idx)
{
bool found = false;
int64_t N = exprs.count();
for (int64_t i = 0; !found && i < N; ++i) {
if (is_expr_equivalent(exprs.at(i), expr)) {
found = true;
idx = i;
}
}
return found;
}
int ObOptimizerUtil::find_stmt_expr_direction(const ObDMLStmt &stmt,
const common::ObIArray<ObRawExpr*> &exprs,
const EqualSets &equal_sets,

View File

@ -326,6 +326,10 @@ public:
const ObRawExpr *expr,
int64_t &idx);
static bool find_equal_expr(const common::ObIArray<const ObRawExpr*> &exprs,
const ObRawExpr *expr,
int64_t &idx);
static bool find_equal_expr(const common::ObIArray<ObRawExpr*> &exprs,
const ObRawExpr *expr,
const EqualSets &equal_sets)

View File

@ -268,6 +268,41 @@ int ObSelEstimator::append_estimators(ObIArray<ObSelEstimator *> &sel_estimators
return ret;
}
void ObSelEstimator::extract_default_eigen_expr(const ObRawExpr *expr)
{
bool is_valid = true;
if (OB_NOT_NULL(expr) && !expr->is_const_expr()) {
eigen_expr_ = expr;
} else {
eigen_expr_ = NULL;
}
while (is_valid && OB_NOT_NULL(eigen_expr_) &&
OB_SUCCESS == ObOptSelectivity::remove_ignorable_func_for_est_sel(eigen_expr_) &&
eigen_expr_->get_param_count() > 0 &&
(IS_BOOL_OP(eigen_expr_->get_expr_type()) ||
T_OP_ADD == eigen_expr_->get_expr_type() ||
T_OP_MINUS == eigen_expr_->get_expr_type() ||
T_OP_ROW == eigen_expr_->get_expr_type())) {
is_valid = true;
const ObRawExpr *variable_expr = NULL;
for (int64_t i = 0; is_valid && i < eigen_expr_->get_param_count(); i ++) {
if (NULL == eigen_expr_->get_param_expr(i)) {
is_valid = false;
} else if (eigen_expr_->get_param_expr(i)->is_const_expr()) {
// do nothing
} else if (NULL != variable_expr) {
is_valid = false;
} else {
variable_expr = eigen_expr_->get_param_expr(i);
}
}
if (is_valid) {
eigen_expr_ = variable_expr;
}
}
return;
}
int ObDefaultSelEstimator::get_sel(const OptTableMetas &table_metas,
const OptSelectivityCtx &ctx,
double &selectivity,
@ -1699,6 +1734,7 @@ int ObLikeSelEstimator::create_estimator(ObSelEstimatorFactory &factory,
like_estimator->pattern_->is_static_const_expr() &&
like_estimator->escape_->is_static_const_expr()) {
bool is_start_with = false;
like_estimator->eigen_expr_ = like_estimator->variable_;
if (OB_FAIL(ObOptEstUtils::if_expr_start_with_patten_sign(params, like_estimator->pattern_,
like_estimator->escape_,
ctx.get_opt_ctx().get_exec_ctx(),
@ -1962,6 +1998,23 @@ int ObBoolOpSelEstimator::create_estimator(ObSelEstimatorFactory &factory,
}
}
}
if (OB_SUCC(ret)) {
const ObRawExpr *eigen_expr = NULL;
bool inited = false;
for (int64_t i = 0; i < bool_estimator->child_estimators_.count(); i ++) {
ObSelEstimator *child_estimator = bool_estimator->child_estimators_.at(i);
if (ObSelEstType::CONST == child_estimator->get_type()) {
// do nothing
} else if (!inited) {
eigen_expr = child_estimator->get_eigen_expr();
inited = true;
} else if (eigen_expr != child_estimator->get_eigen_expr()) {
eigen_expr = NULL;
break;
}
}
bool_estimator->eigen_expr_ = eigen_expr;
}
}
return ret;
}
@ -2026,9 +2079,10 @@ int ObBoolOpSelEstimator::get_sel(const OptTableMetas &table_metas,
// t_op_is, t_op_nseq , they are null safe exprs, don't consider null_sel.
selectivity = 1.0 - tmp_selectivity;
}
} else if (T_OP_AND == qual.get_expr_type() || T_OP_OR == qual.get_expr_type()) {
} else if (T_OP_OR == qual.get_expr_type()) {
double tmp_selectivity = 1.0;
ObSEArray<double, 4> selectivities;
bool is_mutex = (NULL != eigen_expr_);
for (int64_t i = 0; OB_SUCC(ret) && i < child_estimators_.count(); ++i) {
ObSelEstimator *estimator = NULL;
if (OB_ISNULL(estimator = child_estimators_.at(i))) {
@ -2040,12 +2094,8 @@ int ObBoolOpSelEstimator::get_sel(const OptTableMetas &table_metas,
LOG_WARN("failed to push back", K(ret));
}
}
if (OB_FAIL(ret)) {
} else if (T_OP_OR == qual.get_expr_type()) {
bool is_mutex = false;;
if (OB_FAIL(ObOptSelectivity::check_mutex_or(qual, is_mutex))) {
LOG_WARN("failed to check mutex or", K(ret));
} else if (is_mutex) {
if (OB_SUCC(ret)) {
if (is_mutex) {
selectivity = 0.0;
for (int64_t i = 0; i < selectivities.count(); i ++) {
selectivity += selectivities.at(i);
@ -2059,8 +2109,11 @@ int ObBoolOpSelEstimator::get_sel(const OptTableMetas &table_metas,
selectivity = ctx.get_correlation_model().combine_filters_selectivity(selectivities);
selectivity = 1- selectivity;
}
} else {
selectivity = ctx.get_correlation_model().combine_filters_selectivity(selectivities);
}
} else if (T_OP_AND == qual.get_expr_type()) {
if (OB_FAIL(ObOptSelectivity::calculate_selectivity(
table_metas, ctx, child_estimators_, selectivity, all_predicate_sel))) {
LOG_WARN("failed to calculate selectivity", K(ret));
}
} else {
ret = OB_ERR_UNEXPECTED;
@ -2092,6 +2145,7 @@ int ObRangeSelEstimator::create_estimator(ObSelEstimatorFactory &factory,
LOG_WARN("failed to create estimator ", K(ret));
} else {
range_estimator->column_expr_ = static_cast<ObColumnRefRawExpr *>(column_exprs.at(0));
range_estimator->eigen_expr_ = range_estimator->column_expr_;
if (OB_FAIL(range_estimator->range_exprs_.push_back(const_cast<ObRawExpr *>(&expr)))) {
LOG_WARN("failed to push back", K(ret));
} else {
@ -3212,6 +3266,7 @@ int ObCmpSelEstimator::create_estimator(ObSelEstimatorFactory &factory,
} else {
const ObRawExpr *left_expr = expr.get_param_expr(0);
const ObRawExpr *right_expr = expr.get_param_expr(1);
ObCmpSelEstimator *cmp_estimator = static_cast<ObCmpSelEstimator*>(estimator);
if (OB_ISNULL(left_expr) || OB_ISNULL(right_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get null expr", K(ret), K(left_expr), K(right_expr));
@ -3220,9 +3275,9 @@ int ObCmpSelEstimator::create_estimator(ObSelEstimatorFactory &factory,
LOG_WARN("failed to get expr without lossless cast", K(ret));
} else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) ||
(left_expr->is_const_expr() && right_expr->is_column_ref_expr())) {
static_cast<ObCmpSelEstimator*>(estimator)->can_calc_sel_ = true;
static_cast<ObCmpSelEstimator*>(estimator)->col_expr_ = left_expr->is_column_ref_expr() ? static_cast<const ObColumnRefRawExpr*>(left_expr) :
static_cast<const ObColumnRefRawExpr*>(right_expr);
cmp_estimator->can_calc_sel_ = true;
cmp_estimator->col_expr_ = left_expr->is_column_ref_expr() ? static_cast<const ObColumnRefRawExpr*>(left_expr) :
static_cast<const ObColumnRefRawExpr*>(right_expr);
} else if (T_OP_ROW == left_expr->get_expr_type() && T_OP_ROW == right_expr->get_expr_type()) {
//only deal (col1, xx, xx) CMP (const, xx, xx)
if (left_expr->get_param_count() == 1 && OB_NOT_NULL(left_expr->get_param_expr(0)) &&
@ -3245,11 +3300,14 @@ int ObCmpSelEstimator::create_estimator(ObSelEstimatorFactory &factory,
LOG_WARN("get unexpected null", K(ret), K(left_expr), K(right_expr));
} else if ((left_expr->is_column_ref_expr() && right_expr->is_const_expr()) ||
(left_expr->is_const_expr() && right_expr->is_column_ref_expr())) {
static_cast<ObCmpSelEstimator*>(estimator)->can_calc_sel_ = true;
static_cast<ObCmpSelEstimator*>(estimator)->col_expr_ = left_expr->is_column_ref_expr() ? static_cast<const ObColumnRefRawExpr*>(left_expr) :
static_cast<const ObColumnRefRawExpr*>(right_expr);
cmp_estimator->can_calc_sel_ = true;
cmp_estimator->col_expr_ = left_expr->is_column_ref_expr() ? static_cast<const ObColumnRefRawExpr*>(left_expr) :
static_cast<const ObColumnRefRawExpr*>(right_expr);
} else { /* no dothing */ }
}
if (OB_SUCC(ret) && NULL != cmp_estimator->col_expr_) {
estimator->set_eigen_expr(cmp_estimator->col_expr_);
}
}
return ret;
}
@ -3409,6 +3467,7 @@ int ObUniformRangeSelEstimator::create_estimator(ObSelEstimatorFactory &factory,
range_estimator->expr_ = param_expr;
range_estimator->range_ = range;
range_estimator->is_not_op_ = is_not_op;
range_estimator->eigen_expr_ = param_expr;
}
return ret;
}

View File

@ -82,7 +82,7 @@ class ObSelEstimatorFactory;
class ObSelEstimator
{
public:
ObSelEstimator(ObSelEstType type) : type_(type) {}
ObSelEstimator(ObSelEstType type) : type_(type), eigen_expr_(NULL) {}
virtual ~ObSelEstimator() = default;
static int append_estimators(ObIArray<ObSelEstimator *> &sel_estimators, ObSelEstimator *new_estimator);
@ -100,10 +100,15 @@ public:
virtual bool tend_to_use_ds() = 0;
inline ObSelEstType get_type() const { return type_; }
const ObRawExpr *get_eigen_expr() const { return eigen_expr_; }
void set_eigen_expr(const ObRawExpr *expr) { eigen_expr_ = expr; }
void extract_default_eigen_expr(const ObRawExpr *expr);
VIRTUAL_TO_STRING_KV(K_(type));
protected:
ObSelEstType type_;
const ObRawExpr *eigen_expr_; // Used to check whether two estimators are fully correlated
private:
DISABLE_COPY_ASSIGN(ObSelEstimator);
@ -189,6 +194,7 @@ int create_simple_estimator(ObSelEstimatorFactory &factory,
LOG_WARN("failed to create estimator ", K(ret));
} else {
temp_estimator->set_expr(&expr);
temp_estimator->extract_default_eigen_expr(&expr);
estimator = temp_estimator;
}
return ret;

View File

@ -351,7 +351,7 @@ Query Plan
=========================================================================
|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)|
-------------------------------------------------------------------------
|0 |PX COORDINATOR MERGE SORT | |1 |42 |
|0 |PX COORDINATOR MERGE SORT | |1 |43 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |42 |
|2 | └─MERGE GROUP BY | |1 |41 |
|3 | └─EXCHANGE IN MERGE SORT DISTR | |20 |39 |
@ -406,7 +406,7 @@ Query Plan
=========================================================================
|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)|
-------------------------------------------------------------------------
|0 |PX COORDINATOR MERGE SORT | |1 |46 |
|0 |PX COORDINATOR MERGE SORT | |1 |47 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |46 |
|2 | └─MERGE GROUP BY | |1 |46 |
|3 | └─EXCHANGE IN MERGE SORT DISTR | |20 |43 |
@ -461,7 +461,7 @@ Query Plan
=========================================================================
|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)|
-------------------------------------------------------------------------
|0 |PX COORDINATOR MERGE SORT | |1 |44 |
|0 |PX COORDINATOR MERGE SORT | |1 |45 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |44 |
|2 | └─MERGE GROUP BY | |1 |43 |
|3 | └─EXCHANGE IN MERGE SORT DISTR | |20 |41 |
@ -516,7 +516,7 @@ Query Plan
=========================================================================
|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)|
-------------------------------------------------------------------------
|0 |PX COORDINATOR MERGE SORT | |1 |46 |
|0 |PX COORDINATOR MERGE SORT | |1 |47 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |46 |
|2 | └─MERGE GROUP BY | |1 |46 |
|3 | └─EXCHANGE IN MERGE SORT DISTR | |20 |43 |
@ -571,7 +571,7 @@ Query Plan
============================================================================
|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)|
----------------------------------------------------------------------------
|0 |PX COORDINATOR MERGE SORT | |1 |59 |
|0 |PX COORDINATOR MERGE SORT | |1 |60 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |59 |
|2 | └─MERGE GROUP BY | |1 |59 |
|3 | └─EXCHANGE IN MERGE SORT DISTR | |19 |56 |
@ -626,9 +626,9 @@ Query Plan
============================================================================
|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)|
----------------------------------------------------------------------------
|0 |PX COORDINATOR MERGE SORT | |1 |69 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |68 |
|2 | └─MERGE GROUP BY | |1 |68 |
|0 |PX COORDINATOR MERGE SORT | |2 |70 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |2 |69 |
|2 | └─MERGE GROUP BY | |2 |68 |
|3 | └─EXCHANGE IN MERGE SORT DISTR | |21 |66 |
|4 | └─EXCHANGE OUT DISTR (HASH) |:EX10000 |21 |57 |
|5 | └─MERGE GROUP BY | |21 |37 |
@ -681,8 +681,8 @@ Query Plan
============================================================================
|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)|
----------------------------------------------------------------------------
|0 |PX COORDINATOR MERGE SORT | |1 |77 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |77 |
|0 |PX COORDINATOR MERGE SORT | |1 |78 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |78 |
|2 | └─MERGE GROUP BY | |1 |77 |
|3 | └─EXCHANGE IN MERGE SORT DISTR | |20 |75 |
|4 | └─EXCHANGE OUT DISTR (HASH) |:EX10000 |20 |66 |
@ -736,9 +736,9 @@ Query Plan
============================================================================
|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)|
----------------------------------------------------------------------------
|0 |PX COORDINATOR MERGE SORT | |1 |91 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |1 |91 |
|2 | └─MERGE GROUP BY | |1 |91 |
|0 |PX COORDINATOR MERGE SORT | |2 |93 |
|1 |└─EXCHANGE OUT DISTR |:EX10001 |2 |92 |
|2 | └─MERGE GROUP BY | |2 |91 |
|3 | └─EXCHANGE IN MERGE SORT DISTR | |22 |89 |
|4 | └─EXCHANGE OUT DISTR (HASH) |:EX10000 |22 |79 |
|5 | └─MERGE GROUP BY | |22 |59 |