From b9c1eb1bc830392d7d90b9c04974de21e175a1cd Mon Sep 17 00:00:00 2001 From: hy-guo Date: Mon, 16 Dec 2024 09:46:03 +0000 Subject: [PATCH] support transform distinct agg with multi params --- src/share/aggregate/single_row.h | 4 +- .../ob_transform_distinct_aggregate.cpp | 125 ++++++++++++++---- .../rewrite/ob_transform_distinct_aggregate.h | 4 +- .../r/mysql/count_pushdown_cs_encoding.result | 24 ++-- 4 files changed, 116 insertions(+), 41 deletions(-) diff --git a/src/share/aggregate/single_row.h b/src/share/aggregate/single_row.h index e3a295bac..dda4234de 100644 --- a/src/share/aggregate/single_row.h +++ b/src/share/aggregate/single_row.h @@ -178,7 +178,7 @@ public: agg_row = agg_ctx.agg_rows_.at(i); NotNullBitVector ¬nulls = agg_ctx.row_meta().locate_notnulls_bitmap(agg_row); if (!pvt_skip.at(i)) { - notnulls.set(i); + notnulls.set(agg_col_id); } } } else { @@ -186,7 +186,7 @@ public: agg_row = agg_ctx.agg_rows_.at(row_sel.index(i)); NotNullBitVector ¬nulls = agg_ctx.row_meta().locate_notnulls_bitmap(agg_row); if (!pvt_skip.at(row_sel.index(i))) { - notnulls.set(row_sel.index(i)); + notnulls.set(agg_col_id); } } } diff --git a/src/sql/rewrite/ob_transform_distinct_aggregate.cpp b/src/sql/rewrite/ob_transform_distinct_aggregate.cpp index e764dc573..5239ea7db 100644 --- a/src/sql/rewrite/ob_transform_distinct_aggregate.cpp +++ b/src/sql/rewrite/ob_transform_distinct_aggregate.cpp @@ -65,7 +65,9 @@ int ObTransformDistinctAggregate::check_transform_validity(const ObDMLStmt *stmt { int ret = OB_SUCCESS; const ObSelectStmt *select_stmt = NULL; - const ObRawExpr *distinct_expr = NULL; + ObSEArray distinct_exprs; + ObStmtExprReplacer replacer; // to extract shared expr in param expr of distinct agg funcs + replacer.set_relation_scope(); is_valid = true; if (OB_ISNULL(stmt) || OB_ISNULL(stmt->get_query_ctx())) { ret = OB_ERR_UNEXPECTED; @@ -95,17 +97,49 @@ int ObTransformDistinctAggregate::check_transform_validity(const ObDMLStmt *stmt ret = OB_ERR_UNEXPECTED; LOG_WARN("agg expr is null", K(ret), K(i)); } else if (aggr_expr->is_param_distinct()) { - if (aggr_expr->get_real_param_count() != 1) { + if (1 < aggr_expr->get_real_param_count() + && T_FUN_COUNT != aggr_expr->get_expr_type() + && T_FUN_GROUP_CONCAT != aggr_expr->get_expr_type()) { is_valid = false; - OPT_TRACE("can not do transform, distinct aggregate has more than one param"); - } else if (OB_ISNULL(aggr_expr->get_real_param_exprs().at(0))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("param of distinct aggregate function is NULL", K(ret), KPC(aggr_expr)); - } else if (distinct_expr == NULL) { - distinct_expr = aggr_expr->get_real_param_exprs().at(0); - } else if (!distinct_expr->same_as(*aggr_expr->get_real_param_exprs().at(0))) { + OPT_TRACE("can not do transform, stmt has distinct aggregate functions with more than one params"); + } else if (!aggr_expr->get_order_items().empty() || aggr_expr->contain_nested_aggr()) { is_valid = false; - OPT_TRACE("can not do transform, stmt has more than one distinct column"); + OPT_TRACE("can not do transform, stmt has distinct aggregate functions with order item or has nested aggregate"); + } else if (distinct_exprs.empty()) { + if (OB_FAIL(ObOptimizerUtil::append_exprs_no_dup(distinct_exprs, + aggr_expr->get_real_param_exprs()))) { + LOG_WARN("failed to assign aggr param expr", K(ret)); + } + } else { + ObSqlBitSet<> visited_idx; + for (int64_t j = 0; OB_SUCC(ret) && is_valid && j < aggr_expr->get_real_param_count(); ++j) { + ObRawExpr *param_expr = aggr_expr->get_real_param_exprs().at(j); + bool has_find = false; + if (OB_ISNULL(param_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param of distinct aggregate function is NULL", K(ret), KPC(aggr_expr), K(j)); + } + for (int64_t k = 0; OB_SUCC(ret) && !has_find && k < distinct_exprs.count(); ++k) { + if (OB_ISNULL(distinct_exprs.at(k))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param of distinct aggregate function is NULL", K(ret), K(distinct_exprs), K(k)); + } else if (param_expr->same_as(*distinct_exprs.at(k))) { + has_find = true; + if (OB_FAIL(visited_idx.add_member(k))) { + LOG_WARN("failed to add member", K(ret)); + } else if (param_expr != distinct_exprs.at(k) + && OB_FAIL(replacer.add_replace_expr(param_expr, distinct_exprs.at(k)))) { + LOG_WARN("failed to add replace expr", K(ret)); + } + break; + } + } + is_valid &= has_find; + } + is_valid &= (visited_idx.num_members() == distinct_exprs.count()); + if (!is_valid) { + OPT_TRACE("can not do transform, stmt has aggregate functions with different distinct columns"); + } } } else if (OB_FAIL(ObOptimizerUtil::check_aggr_can_pre_aggregate(aggr_expr, is_valid))) { LOG_WARN("failed to check aggr can pre aggregate", K(ret), KPC(aggr_expr)); @@ -113,10 +147,24 @@ int ObTransformDistinctAggregate::check_transform_validity(const ObDMLStmt *stmt OPT_TRACE("can not do transform, stmt has aggregate function that can not pre aggregate"); } } - if (OB_SUCC(ret) && is_valid && NULL == distinct_expr) { + if (OB_SUCC(ret) && is_valid && distinct_exprs.empty()) { is_valid = false; OPT_TRACE("do not need to transform, stmt has no distinct aggregate function"); } + if (OB_SUCC(ret) && is_valid) { + // replace the same exprs in distinct agg funcs with shared exprs + for (int64_t i = 0; OB_SUCC(ret) && i < select_stmt->get_aggr_item_size(); ++i) { + ObRawExpr *aggr_expr = select_stmt->get_aggr_item(i); + if (OB_ISNULL(aggr_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("agg expr is null", K(ret), K(i)); + } else if (!static_cast(aggr_expr)->is_param_distinct()) { + // do nothing + } else if (OB_FAIL(replacer.do_visit(aggr_expr))) { + LOG_WARN("failed to replace param expr", K(ret)); + } + } + } return ret; } @@ -140,18 +188,13 @@ int ObTransformDistinctAggregate::do_transform(ObSelectStmt *stmt, non_distinct_aggr, distinct_aggr))) { LOG_WARN("failed to classify aggr exprs", K(ret)); - } else if (OB_UNLIKELY(distinct_aggr.empty() || NULL == distinct_aggr.at(0) - || 1 != distinct_aggr.at(0)->get_real_param_count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected distinct aggr", K(ret), K(distinct_aggr)); } else if (OB_FAIL(construct_view_select_exprs(non_distinct_aggr, - distinct_aggr, view_select_exprs))) { LOG_WARN("failed to construct view select exprs", K(ret)); - } else if (OB_FAIL(append(view_group_exprs, stmt->get_group_exprs()))) { - LOG_WARN("failed to append group exprs", K(ret)); - } else if (OB_FAIL(append(view_group_exprs, distinct_aggr.at(0)->get_real_param_exprs()))) { - LOG_WARN("failed to append group exprs", K(ret)); + } else if (OB_FAIL(construct_view_group_exprs(stmt->get_group_exprs(), + distinct_aggr, + view_group_exprs))) { + LOG_WARN("failed to construct view group exprs", K(ret)); } else if (OB_FAIL(stmt->get_from_tables(from_tables))) { LOG_WARN("failed to get from tables", K(ret)); } else if (OB_FAIL(semi_infos.assign(stmt->get_semi_infos()))) { @@ -193,10 +236,7 @@ int ObTransformDistinctAggregate::classify_aggr_exprs(const ObIArrayis_param_distinct()) { - if (!distinct_aggr.empty() && - OB_FAIL(aggr_expr->get_real_param_exprs_for_update().assign(distinct_aggr.at(0)->get_real_param_exprs()))) { - LOG_WARN("failed to assign param exprs for distinct aggr", K(ret), KPC(aggr_expr)); - } else if (OB_FAIL(distinct_aggr.push_back(aggr_expr))) { + if (OB_FAIL(distinct_aggr.push_back(aggr_expr))) { LOG_WARN("failed to push back distinct aggr", K(ret)); } } else { @@ -214,14 +254,11 @@ int ObTransformDistinctAggregate::classify_aggr_exprs(const ObIArray &non_distinct_aggr, - const ObIArray &distinct_aggr, ObIArray &view_select_exprs) { int ret = OB_SUCCESS; - if (OB_FAIL(ObOptimizerUtil::append_exprs_no_dup(view_select_exprs, - distinct_aggr.at(0)->get_real_param_exprs()))) { - LOG_WARN("failed to append distinct expr", K(ret)); - } + // We do not need to add distinct columns into view select exprs here, they + // will be added by ObTransformUtils::create_inline_view automatically for (int64_t i = 0; OB_SUCC(ret) && i < non_distinct_aggr.count(); ++i) { if (OB_FAIL(add_var_to_array_no_dup(view_select_exprs, static_cast(non_distinct_aggr.at(i))))) { LOG_WARN("failed to add non distinct aggr expr", K(ret)); @@ -230,6 +267,36 @@ int ObTransformDistinctAggregate::construct_view_select_exprs(const ObIArray &ori_group_expr, + const ObIArray &distinct_aggr, + ObIArray &view_group_exprs) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(distinct_aggr.empty() || NULL == distinct_aggr.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected distinct aggr", K(ret), K(distinct_aggr)); + } else if (OB_FAIL(append(view_group_exprs, ori_group_expr))) { + LOG_WARN("failed to append group exprs", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < distinct_aggr.at(0)->get_real_param_count(); ++i) { + ObRawExpr *param_expr = distinct_aggr.at(0)->get_real_param_exprs().at(i); + if (OB_ISNULL(param_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param expr is null", K(ret), K(i), KPC(distinct_aggr.at(0))); + } else if (param_expr->is_static_const_expr()) { + // do nothing, do not need to add static const expr into group exprs + } else if (OB_FAIL(add_var_to_array_no_dup(view_group_exprs, param_expr))) { + LOG_WARN("failed to add distinct aggr param", K(ret)); + } + } + return ret; +} + /** * @brief ObTransformDistinctAggregate::replace_aggr_func * diff --git a/src/sql/rewrite/ob_transform_distinct_aggregate.h b/src/sql/rewrite/ob_transform_distinct_aggregate.h index 097b8c7e0..17d9778a9 100644 --- a/src/sql/rewrite/ob_transform_distinct_aggregate.h +++ b/src/sql/rewrite/ob_transform_distinct_aggregate.h @@ -39,8 +39,10 @@ private: ObIArray &non_distinct_aggr, ObIArray &distinct_aggr); int construct_view_select_exprs(const ObIArray &non_distinct_aggr, - const ObIArray &distinct_aggr, ObIArray &view_select_exprs); + int construct_view_group_exprs(const ObIArray &ori_group_expr, + const ObIArray &distinct_aggr, + ObIArray &view_group_exprs); int replace_aggr_func(ObSelectStmt *stmt, TableItem *view_table, const ObIArray &distinct_aggr); diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/count_pushdown_cs_encoding.result b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/count_pushdown_cs_encoding.result index d07a65108..df60c2d92 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/count_pushdown_cs_encoding.result +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/count_pushdown_cs_encoding.result @@ -161,17 +161,23 @@ Outputs & filters: range_key([t3.__pk_increment]), range(MIN ; MAX)always true explain select count(distinct a,b) from t3; Query Plan -================================================= -|ID|OPERATOR |NAME|EST.ROWS|EST.TIME(us)| -------------------------------------------------- -|0 |SCALAR GROUP BY | |1 |3 | -|1 |└─TABLE FULL SCAN|t3 |1 |3 | -================================================= +====================================================== +|ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| +------------------------------------------------------ +|0 |SCALAR GROUP BY | |1 |3 | +|1 |└─SUBPLAN SCAN |VIEW1|1 |3 | +|2 | └─HASH DISTINCT | |1 |3 | +|3 | └─TABLE FULL SCAN|t3 |1 |3 | +====================================================== Outputs & filters: ------------------------------------- - 0 - output([T_FUN_COUNT(distinct t3.a, t3.b)]), filter(nil), rowset=16 - group(nil), agg_func([T_FUN_COUNT(distinct t3.a, t3.b)]) - 1 - output([t3.a], [t3.b]), filter(nil), rowset=16 + 0 - output([T_FUN_COUNT(VIEW1.t3.a, VIEW1.t3.b)]), filter(nil), rowset=16 + group(nil), agg_func([T_FUN_COUNT(VIEW1.t3.a, VIEW1.t3.b)]) + 1 - output([VIEW1.t3.a], [VIEW1.t3.b]), filter(nil), rowset=16 + access([VIEW1.t3.a], [VIEW1.t3.b]) + 2 - output([t3.a], [t3.b]), filter(nil), rowset=16 + distinct([t3.a], [t3.b]) + 3 - output([t3.a], [t3.b]), filter(nil), rowset=16 access([t3.a], [t3.b]), partitions(p0) is_index_back=false, is_global_index=false, range_key([t3.__pk_increment]), range(MIN ; MAX)always true