support transform distinct agg with multi params

This commit is contained in:
hy-guo 2024-12-16 09:46:03 +00:00 committed by ob-robot
parent cd11b83db4
commit b9c1eb1bc8
4 changed files with 116 additions and 41 deletions

View File

@ -178,7 +178,7 @@ public:
agg_row = agg_ctx.agg_rows_.at(i);
NotNullBitVector &notnulls = agg_ctx.row_meta().locate_notnulls_bitmap(agg_row);
if (!pvt_skip.at(i)) {
notnulls.set(i);
notnulls.set(agg_col_id);
}
}
} else {
@ -186,7 +186,7 @@ public:
agg_row = agg_ctx.agg_rows_.at(row_sel.index(i));
NotNullBitVector &notnulls = agg_ctx.row_meta().locate_notnulls_bitmap(agg_row);
if (!pvt_skip.at(row_sel.index(i))) {
notnulls.set(row_sel.index(i));
notnulls.set(agg_col_id);
}
}
}

View File

@ -65,7 +65,9 @@ int ObTransformDistinctAggregate::check_transform_validity(const ObDMLStmt *stmt
{
int ret = OB_SUCCESS;
const ObSelectStmt *select_stmt = NULL;
const ObRawExpr *distinct_expr = NULL;
ObSEArray<ObRawExpr*, 4> distinct_exprs;
ObStmtExprReplacer replacer; // to extract shared expr in param expr of distinct agg funcs
replacer.set_relation_scope();
is_valid = true;
if (OB_ISNULL(stmt) || OB_ISNULL(stmt->get_query_ctx())) {
ret = OB_ERR_UNEXPECTED;
@ -95,17 +97,49 @@ int ObTransformDistinctAggregate::check_transform_validity(const ObDMLStmt *stmt
ret = OB_ERR_UNEXPECTED;
LOG_WARN("agg expr is null", K(ret), K(i));
} else if (aggr_expr->is_param_distinct()) {
if (aggr_expr->get_real_param_count() != 1) {
if (1 < aggr_expr->get_real_param_count()
&& T_FUN_COUNT != aggr_expr->get_expr_type()
&& T_FUN_GROUP_CONCAT != aggr_expr->get_expr_type()) {
is_valid = false;
OPT_TRACE("can not do transform, distinct aggregate has more than one param");
} else if (OB_ISNULL(aggr_expr->get_real_param_exprs().at(0))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("param of distinct aggregate function is NULL", K(ret), KPC(aggr_expr));
} else if (distinct_expr == NULL) {
distinct_expr = aggr_expr->get_real_param_exprs().at(0);
} else if (!distinct_expr->same_as(*aggr_expr->get_real_param_exprs().at(0))) {
OPT_TRACE("can not do transform, stmt has distinct aggregate functions with more than one params");
} else if (!aggr_expr->get_order_items().empty() || aggr_expr->contain_nested_aggr()) {
is_valid = false;
OPT_TRACE("can not do transform, stmt has more than one distinct column");
OPT_TRACE("can not do transform, stmt has distinct aggregate functions with order item or has nested aggregate");
} else if (distinct_exprs.empty()) {
if (OB_FAIL(ObOptimizerUtil::append_exprs_no_dup(distinct_exprs,
aggr_expr->get_real_param_exprs()))) {
LOG_WARN("failed to assign aggr param expr", K(ret));
}
} else {
ObSqlBitSet<> visited_idx;
for (int64_t j = 0; OB_SUCC(ret) && is_valid && j < aggr_expr->get_real_param_count(); ++j) {
ObRawExpr *param_expr = aggr_expr->get_real_param_exprs().at(j);
bool has_find = false;
if (OB_ISNULL(param_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("param of distinct aggregate function is NULL", K(ret), KPC(aggr_expr), K(j));
}
for (int64_t k = 0; OB_SUCC(ret) && !has_find && k < distinct_exprs.count(); ++k) {
if (OB_ISNULL(distinct_exprs.at(k))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("param of distinct aggregate function is NULL", K(ret), K(distinct_exprs), K(k));
} else if (param_expr->same_as(*distinct_exprs.at(k))) {
has_find = true;
if (OB_FAIL(visited_idx.add_member(k))) {
LOG_WARN("failed to add member", K(ret));
} else if (param_expr != distinct_exprs.at(k)
&& OB_FAIL(replacer.add_replace_expr(param_expr, distinct_exprs.at(k)))) {
LOG_WARN("failed to add replace expr", K(ret));
}
break;
}
}
is_valid &= has_find;
}
is_valid &= (visited_idx.num_members() == distinct_exprs.count());
if (!is_valid) {
OPT_TRACE("can not do transform, stmt has aggregate functions with different distinct columns");
}
}
} else if (OB_FAIL(ObOptimizerUtil::check_aggr_can_pre_aggregate(aggr_expr, is_valid))) {
LOG_WARN("failed to check aggr can pre aggregate", K(ret), KPC(aggr_expr));
@ -113,10 +147,24 @@ int ObTransformDistinctAggregate::check_transform_validity(const ObDMLStmt *stmt
OPT_TRACE("can not do transform, stmt has aggregate function that can not pre aggregate");
}
}
if (OB_SUCC(ret) && is_valid && NULL == distinct_expr) {
if (OB_SUCC(ret) && is_valid && distinct_exprs.empty()) {
is_valid = false;
OPT_TRACE("do not need to transform, stmt has no distinct aggregate function");
}
if (OB_SUCC(ret) && is_valid) {
// replace the same exprs in distinct agg funcs with shared exprs
for (int64_t i = 0; OB_SUCC(ret) && i < select_stmt->get_aggr_item_size(); ++i) {
ObRawExpr *aggr_expr = select_stmt->get_aggr_item(i);
if (OB_ISNULL(aggr_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("agg expr is null", K(ret), K(i));
} else if (!static_cast<ObAggFunRawExpr*>(aggr_expr)->is_param_distinct()) {
// do nothing
} else if (OB_FAIL(replacer.do_visit(aggr_expr))) {
LOG_WARN("failed to replace param expr", K(ret));
}
}
}
return ret;
}
@ -140,18 +188,13 @@ int ObTransformDistinctAggregate::do_transform(ObSelectStmt *stmt,
non_distinct_aggr,
distinct_aggr))) {
LOG_WARN("failed to classify aggr exprs", K(ret));
} else if (OB_UNLIKELY(distinct_aggr.empty() || NULL == distinct_aggr.at(0)
|| 1 != distinct_aggr.at(0)->get_real_param_count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected distinct aggr", K(ret), K(distinct_aggr));
} else if (OB_FAIL(construct_view_select_exprs(non_distinct_aggr,
distinct_aggr,
view_select_exprs))) {
LOG_WARN("failed to construct view select exprs", K(ret));
} else if (OB_FAIL(append(view_group_exprs, stmt->get_group_exprs()))) {
LOG_WARN("failed to append group exprs", K(ret));
} else if (OB_FAIL(append(view_group_exprs, distinct_aggr.at(0)->get_real_param_exprs()))) {
LOG_WARN("failed to append group exprs", K(ret));
} else if (OB_FAIL(construct_view_group_exprs(stmt->get_group_exprs(),
distinct_aggr,
view_group_exprs))) {
LOG_WARN("failed to construct view group exprs", K(ret));
} else if (OB_FAIL(stmt->get_from_tables(from_tables))) {
LOG_WARN("failed to get from tables", K(ret));
} else if (OB_FAIL(semi_infos.assign(stmt->get_semi_infos()))) {
@ -193,10 +236,7 @@ int ObTransformDistinctAggregate::classify_aggr_exprs(const ObIArray<ObAggFunRaw
ret = OB_ERR_UNEXPECTED;
LOG_WARN("aggr expr is null", K(ret), K(i));
} else if (aggr_expr->is_param_distinct()) {
if (!distinct_aggr.empty() &&
OB_FAIL(aggr_expr->get_real_param_exprs_for_update().assign(distinct_aggr.at(0)->get_real_param_exprs()))) {
LOG_WARN("failed to assign param exprs for distinct aggr", K(ret), KPC(aggr_expr));
} else if (OB_FAIL(distinct_aggr.push_back(aggr_expr))) {
if (OB_FAIL(distinct_aggr.push_back(aggr_expr))) {
LOG_WARN("failed to push back distinct aggr", K(ret));
}
} else {
@ -214,14 +254,11 @@ int ObTransformDistinctAggregate::classify_aggr_exprs(const ObIArray<ObAggFunRaw
* view select exprs = distinct columns + non distinct aggrs
*/
int ObTransformDistinctAggregate::construct_view_select_exprs(const ObIArray<ObAggFunRawExpr*> &non_distinct_aggr,
const ObIArray<ObAggFunRawExpr*> &distinct_aggr,
ObIArray<ObRawExpr*> &view_select_exprs)
{
int ret = OB_SUCCESS;
if (OB_FAIL(ObOptimizerUtil::append_exprs_no_dup(view_select_exprs,
distinct_aggr.at(0)->get_real_param_exprs()))) {
LOG_WARN("failed to append distinct expr", K(ret));
}
// We do not need to add distinct columns into view select exprs here, they
// will be added by ObTransformUtils::create_inline_view automatically
for (int64_t i = 0; OB_SUCC(ret) && i < non_distinct_aggr.count(); ++i) {
if (OB_FAIL(add_var_to_array_no_dup(view_select_exprs, static_cast<ObRawExpr*>(non_distinct_aggr.at(i))))) {
LOG_WARN("failed to add non distinct aggr expr", K(ret));
@ -230,6 +267,36 @@ int ObTransformDistinctAggregate::construct_view_select_exprs(const ObIArray<ObA
return ret;
}
/**
* @brief ObTransformDistinctAggregate::construct_view_group_exprs
*
* view group exprs = origin group exprs + distinct columns
*/
int ObTransformDistinctAggregate::construct_view_group_exprs(const ObIArray<ObRawExpr*> &ori_group_expr,
const ObIArray<ObAggFunRawExpr*> &distinct_aggr,
ObIArray<ObRawExpr*> &view_group_exprs)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(distinct_aggr.empty() || NULL == distinct_aggr.at(0))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected distinct aggr", K(ret), K(distinct_aggr));
} else if (OB_FAIL(append(view_group_exprs, ori_group_expr))) {
LOG_WARN("failed to append group exprs", K(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < distinct_aggr.at(0)->get_real_param_count(); ++i) {
ObRawExpr *param_expr = distinct_aggr.at(0)->get_real_param_exprs().at(i);
if (OB_ISNULL(param_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("param expr is null", K(ret), K(i), KPC(distinct_aggr.at(0)));
} else if (param_expr->is_static_const_expr()) {
// do nothing, do not need to add static const expr into group exprs
} else if (OB_FAIL(add_var_to_array_no_dup(view_group_exprs, param_expr))) {
LOG_WARN("failed to add distinct aggr param", K(ret));
}
}
return ret;
}
/**
* @brief ObTransformDistinctAggregate::replace_aggr_func
*

View File

@ -39,8 +39,10 @@ private:
ObIArray<ObAggFunRawExpr*> &non_distinct_aggr,
ObIArray<ObAggFunRawExpr*> &distinct_aggr);
int construct_view_select_exprs(const ObIArray<ObAggFunRawExpr*> &non_distinct_aggr,
const ObIArray<ObAggFunRawExpr*> &distinct_aggr,
ObIArray<ObRawExpr*> &view_select_exprs);
int construct_view_group_exprs(const ObIArray<ObRawExpr*> &ori_group_expr,
const ObIArray<ObAggFunRawExpr*> &distinct_aggr,
ObIArray<ObRawExpr*> &view_group_exprs);
int replace_aggr_func(ObSelectStmt *stmt,
TableItem *view_table,
const ObIArray<ObAggFunRawExpr*> &distinct_aggr);

View File

@ -161,17 +161,23 @@ Outputs & filters:
range_key([t3.__pk_increment]), range(MIN ; MAX)always true
explain select count(distinct a,b) from t3;
Query Plan
=================================================
|ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)|
-------------------------------------------------
|0 |SCALAR GROUP BY | |1 |3 |
|1 |└─TABLE FULL SCAN|t3 |1 |3 |
=================================================
======================================================
|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)|
------------------------------------------------------
|0 |SCALAR GROUP BY | |1 |3 |
|1 |└─SUBPLAN SCAN |VIEW1|1 |3 |
|2 | └─HASH DISTINCT | |1 |3 |
|3 | └─TABLE FULL SCAN|t3 |1 |3 |
======================================================
Outputs & filters:
-------------------------------------
0 - output([T_FUN_COUNT(distinct t3.a, t3.b)]), filter(nil), rowset=16
group(nil), agg_func([T_FUN_COUNT(distinct t3.a, t3.b)])
1 - output([t3.a], [t3.b]), filter(nil), rowset=16
0 - output([T_FUN_COUNT(VIEW1.t3.a, VIEW1.t3.b)]), filter(nil), rowset=16
group(nil), agg_func([T_FUN_COUNT(VIEW1.t3.a, VIEW1.t3.b)])
1 - output([VIEW1.t3.a], [VIEW1.t3.b]), filter(nil), rowset=16
access([VIEW1.t3.a], [VIEW1.t3.b])
2 - output([t3.a], [t3.b]), filter(nil), rowset=16
distinct([t3.a], [t3.b])
3 - output([t3.a], [t3.b]), filter(nil), rowset=16
access([t3.a], [t3.b]), partitions(p0)
is_index_back=false, is_global_index=false,
range_key([t3.__pk_increment]), range(MIN ; MAX)always true