From fd97efefdec176da4b6173bd7a5e0ee9306046a9 Mon Sep 17 00:00:00 2001 From: Chengpeng Yan <41809508+Reminiscent@users.noreply.github.com> Date: Fri, 7 Feb 2020 10:54:53 +0800 Subject: [PATCH] planner/cascades: add transformation rule MergeAdjacentTopN (#14345) --- planner/cascades/integration_test.go | 24 +++ .../testdata/integration_suite_in.json | 12 ++ .../testdata/integration_suite_out.json | 92 +++++++++++ .../transformation_rules_suite_in.json | 17 +++ .../transformation_rules_suite_out.json | 143 ++++++++++++++++++ planner/cascades/transformation_rules.go | 61 ++++++++ planner/cascades/transformation_rules_test.go | 25 +++ planner/core/logical_plan_builder.go | 5 + 8 files changed, 379 insertions(+) diff --git a/planner/cascades/integration_test.go b/planner/cascades/integration_test.go index 7ec3a74dd8..6bbb0cb8aa 100644 --- a/planner/cascades/integration_test.go +++ b/planner/cascades/integration_test.go @@ -268,3 +268,27 @@ func (s *testIntegrationSuite) TestMemTableScan(c *C) { tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...)) } } + +func (s *testIntegrationSuite) TestTopN(c *C) { + tk := testkit.NewTestKitWithInit(c, s.store) + tk.MustExec("drop table if exists t;") + tk.MustExec("create table t(a int primary key, b int);") + tk.MustExec("insert into t values (1, 11), (4, 44), (2, 22), (3, 33);") + tk.MustExec("set session tidb_enable_cascades_planner = 1;") + var input []string + var output []struct { + SQL string + Plan []string + Result []string + } + s.testData.GetTestCases(c, &input, &output) + for i, sql := range input { + s.testData.OnRecord(func() { + output[i].SQL = sql + output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain " + sql).Rows()) + output[i].Result = s.testData.ConvertRowsToStrings(tk.MustQuery(sql).Rows()) + }) + tk.MustQuery("explain " + sql).Check(testkit.Rows(output[i].Plan...)) + tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...)) + } +} diff --git a/planner/cascades/testdata/integration_suite_in.json b/planner/cascades/testdata/integration_suite_in.json index aec9d912f4..3ea30d6ba5 100644 --- a/planner/cascades/testdata/integration_suite_in.json +++ b/planner/cascades/testdata/integration_suite_in.json @@ -78,5 +78,17 @@ "cases": [ "select * from information_schema.processlist" ] + }, + { + "name": "TestTopN", + "cases": [ + "select a from (select a from t where b > 2 order by a limit 3 offset 1) as t1 order by a limit 2 offset 1", + "select * from (select * from t order by a limit 3) as t1 order by a limit 5", + "select b from (select b from t order by b limit 10 offset 10) as t1 order by b limit 10 offset 5", + "select b from (select b from t order by b limit 10 offset 2) as t1 order by b limit 3 offset 5", + "select a from (select a from t order by a limit 3 offset 5) as t1 order by a limit 3 offset 5", + "select a from (select a from t where b > 2 order by a, b limit 3 offset 1) as t1 order by a limit 2 offset 1", + "select * from (select * from t order by a limit 3) as t1 order by a, b limit 5" + ] } ] diff --git a/planner/cascades/testdata/integration_suite_out.json b/planner/cascades/testdata/integration_suite_out.json index de02b843ae..e25fdcf2ba 100644 --- a/planner/cascades/testdata/integration_suite_out.json +++ b/planner/cascades/testdata/integration_suite_out.json @@ -595,5 +595,97 @@ "Result": null } ] + }, + { + "Name": "TestTopN", + "Cases": [ + { + "SQL": "select a from (select a from t where b > 2 order by a limit 3 offset 1) as t1 order by a limit 2 offset 1", + "Plan": [ + "Projection_25 2.00 root test.t.a", + "└─Limit_27 2.00 root offset:2, count:2", + " └─TableReader_35 4.00 root data:Limit_36", + " └─Limit_36 4.00 cop[tikv] offset:0, count:4", + " └─Selection_33 4.00 cop[tikv] gt(test.t.b, 2)", + " └─TableScan_34 4.00 cop[tikv] table:t, range:[-inf,+inf], keep order:true, stats:pseudo" + ], + "Result": [ + "3", + "4" + ] + }, + { + "SQL": "select * from (select * from t order by a limit 3) as t1 order by a limit 5", + "Plan": [ + "Limit_17 3.00 root offset:0, count:3", + "└─TableReader_23 3.00 root data:Limit_24", + " └─Limit_24 3.00 cop[tikv] offset:0, count:3", + " └─TableScan_22 3.00 cop[tikv] table:t, range:[-inf,+inf], keep order:true, stats:pseudo" + ], + "Result": [ + "1 11", + "2 22", + "3 33" + ] + }, + { + "SQL": "select b from (select b from t order by b limit 10 offset 10) as t1 order by b limit 10 offset 5", + "Plan": [ + "TopN_16 5.00 root test.t.b:asc, offset:15, count:5", + "└─TableReader_18 20.00 root data:TopN_19", + " └─TopN_19 20.00 cop[tikv] test.t.b:asc, offset:0, count:20", + " └─TableScan_21 10000.00 cop[tikv] table:t, range:[-inf,+inf], keep order:false, stats:pseudo" + ], + "Result": null + }, + { + "SQL": "select b from (select b from t order by b limit 10 offset 2) as t1 order by b limit 3 offset 5", + "Plan": [ + "TopN_16 3.00 root test.t.b:asc, offset:7, count:3", + "└─TableReader_18 10.00 root data:TopN_19", + " └─TopN_19 10.00 cop[tikv] test.t.b:asc, offset:0, count:10", + " └─TableScan_21 10000.00 cop[tikv] table:t, range:[-inf,+inf], keep order:false, stats:pseudo" + ], + "Result": null + }, + { + "SQL": "select a from (select a from t order by a limit 3 offset 5) as t1 order by a limit 3 offset 5", + "Plan": [ + "TableDual_14 0.00 root rows:0" + ], + "Result": null + }, + { + "SQL": "select a from (select a from t where b > 2 order by a, b limit 3 offset 1) as t1 order by a limit 2 offset 1", + "Plan": [ + "Projection_25 2.00 root test.t.a", + "└─TopN_26 2.00 root test.t.a:asc, test.t.b:asc, offset:2, count:2", + " └─TableReader_28 4.00 root data:TopN_29", + " └─TopN_29 4.00 cop[tikv] test.t.a:asc, test.t.b:asc, offset:0, count:4", + " └─Selection_31 8000.00 cop[tikv] gt(test.t.b, 2)", + " └─TableScan_32 10000.00 cop[tikv] table:t, range:[-inf,+inf], keep order:false, stats:pseudo" + ], + "Result": [ + "3", + "4" + ] + }, + { + "SQL": "select * from (select * from t order by a limit 3) as t1 order by a, b limit 5", + "Plan": [ + "Limit_14 3.00 root offset:0, count:5", + "└─Sort_26 3.00 root test.t.a:asc, test.t.b:asc", + " └─Limit_16 3.00 root offset:0, count:3", + " └─TableReader_22 3.00 root data:Limit_23", + " └─Limit_23 3.00 cop[tikv] offset:0, count:3", + " └─TableScan_21 3.00 cop[tikv] table:t, range:[-inf,+inf], keep order:true, stats:pseudo" + ], + "Result": [ + "1 11", + "2 22", + "3 33" + ] + } + ] } ] diff --git a/planner/cascades/testdata/transformation_rules_suite_in.json b/planner/cascades/testdata/transformation_rules_suite_in.json index a593df146a..a7007f4c60 100644 --- a/planner/cascades/testdata/transformation_rules_suite_in.json +++ b/planner/cascades/testdata/transformation_rules_suite_in.json @@ -95,6 +95,23 @@ "select a from (select a from t limit 3 offset 5) t1 limit 3 offset 5" ] }, + { + "name": "TestMergeAdjacentTopN", + "cases": [ + "select b from (select b from t where c > 1 order by b limit 3) as t1 order by b limit 2", + "select a from (select a from t where b > 2 order by a limit 3 offset 1) as t1 order by a limit 2 offset 1", + "select * from (select * from t order by a limit 3) as t1 order by a limit 5", + "select b from (select b from t order by b limit 5) as t1 order by b limit 10", + "select b from (select b from t order by b limit 20) as t1 order by b limit 10", + "select b from (select b from t order by b limit 10) as t1 order by b limit 10", + "select b from (select b from t order by b limit 10 offset 10) as t1 order by b limit 10 offset 5", + "select b from (select b from t order by b limit 10 offset 2) as t1 order by b limit 3 offset 5", + "select b from (select b from t order by b limit 10 offset 5) as t1 order by b limit 5 offset 5", + "select a from (select a from t order by a limit 3 offset 5) as t1 order by a limit 3 offset 5", + "select b from (select b from t where c > 1 order by b, a limit 3) as t1 order by b limit 2", + "select a from (select a from t where b > 2 order by a, b limit 3 offset 1) as t1 order by a limit 2 offset 1" + ] + }, { "name": "TestTransformLimitToTableDual", "cases": [ diff --git a/planner/cascades/testdata/transformation_rules_suite_out.json b/planner/cascades/testdata/transformation_rules_suite_out.json index 6280fc2742..49863e4b32 100644 --- a/planner/cascades/testdata/transformation_rules_suite_out.json +++ b/planner/cascades/testdata/transformation_rules_suite_out.json @@ -1339,6 +1339,149 @@ } ] }, + { + "Name": "TestMergeAdjacentTopN", + "Cases": [ + { + "SQL": "select b from (select b from t where c > 1 order by b limit 3) as t1 order by b limit 2", + "Result": [ + "Group#0 Schema:[test.t.b]", + " Projection_11 input:[Group#1], test.t.b", + "Group#1 Schema:[test.t.b,test.t.c]", + " TopN_14 input:[Group#2], test.t.b:asc, offset:0, count:2", + "Group#2 Schema:[test.t.b,test.t.c]", + " Selection_2 input:[Group#3], gt(test.t.c, 1)", + "Group#3 Schema:[test.t.b,test.t.c]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select a from (select a from t where b > 2 order by a limit 3 offset 1) as t1 order by a limit 2 offset 1", + "Result": [ + "Group#0 Schema:[test.t.a]", + " Projection_11 input:[Group#1], test.t.a", + "Group#1 Schema:[test.t.a,test.t.b]", + " TopN_14 input:[Group#2], test.t.a:asc, offset:2, count:2", + "Group#2 Schema:[test.t.a,test.t.b]", + " Selection_2 input:[Group#3], gt(test.t.b, 2)", + "Group#3 Schema:[test.t.a,test.t.b]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select * from (select * from t order by a limit 3) as t1 order by a limit 5", + "Result": [ + "Group#0 Schema:[test.t.a,test.t.b,test.t.c,test.t.d,test.t.e,test.t.c_str,test.t.d_str,test.t.e_str,test.t.f,test.t.g,test.t.h,test.t.i_date]", + " Projection_10 input:[Group#1], test.t.a, test.t.b, test.t.c, test.t.d, test.t.e, test.t.c_str, test.t.d_str, test.t.e_str, test.t.f, test.t.g, test.t.h, test.t.i_date", + "Group#1 Schema:[test.t.a,test.t.b,test.t.c,test.t.d,test.t.e,test.t.c_str,test.t.d_str,test.t.e_str,test.t.f,test.t.g,test.t.h,test.t.i_date]", + " TopN_13 input:[Group#2], test.t.a:asc, offset:0, count:3", + "Group#2 Schema:[test.t.a,test.t.b,test.t.c,test.t.d,test.t.e,test.t.c_str,test.t.d_str,test.t.e_str,test.t.f,test.t.g,test.t.h,test.t.i_date]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select b from (select b from t order by b limit 5) as t1 order by b limit 10", + "Result": [ + "Group#0 Schema:[test.t.b]", + " Projection_10 input:[Group#1], test.t.b", + "Group#1 Schema:[test.t.b]", + " TopN_13 input:[Group#2], test.t.b:asc, offset:0, count:5", + "Group#2 Schema:[test.t.b]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select b from (select b from t order by b limit 20) as t1 order by b limit 10", + "Result": [ + "Group#0 Schema:[test.t.b]", + " Projection_10 input:[Group#1], test.t.b", + "Group#1 Schema:[test.t.b]", + " TopN_13 input:[Group#2], test.t.b:asc, offset:0, count:10", + "Group#2 Schema:[test.t.b]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select b from (select b from t order by b limit 10) as t1 order by b limit 10", + "Result": [ + "Group#0 Schema:[test.t.b]", + " Projection_10 input:[Group#1], test.t.b", + "Group#1 Schema:[test.t.b]", + " TopN_13 input:[Group#2], test.t.b:asc, offset:0, count:10", + "Group#2 Schema:[test.t.b]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select b from (select b from t order by b limit 10 offset 10) as t1 order by b limit 10 offset 5", + "Result": [ + "Group#0 Schema:[test.t.b]", + " Projection_10 input:[Group#1], test.t.b", + "Group#1 Schema:[test.t.b]", + " TopN_13 input:[Group#2], test.t.b:asc, offset:15, count:5", + "Group#2 Schema:[test.t.b]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select b from (select b from t order by b limit 10 offset 2) as t1 order by b limit 3 offset 5", + "Result": [ + "Group#0 Schema:[test.t.b]", + " Projection_10 input:[Group#1], test.t.b", + "Group#1 Schema:[test.t.b]", + " TopN_13 input:[Group#2], test.t.b:asc, offset:7, count:3", + "Group#2 Schema:[test.t.b]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select b from (select b from t order by b limit 10 offset 5) as t1 order by b limit 5 offset 5", + "Result": [ + "Group#0 Schema:[test.t.b]", + " Projection_10 input:[Group#1], test.t.b", + "Group#1 Schema:[test.t.b]", + " TopN_13 input:[Group#2], test.t.b:asc, offset:10, count:5", + "Group#2 Schema:[test.t.b]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select a from (select a from t order by a limit 3 offset 5) as t1 order by a limit 3 offset 5", + "Result": [ + "Group#0 Schema:[test.t.a]", + " Projection_10 input:[Group#1], test.t.a", + "Group#1 Schema:[test.t.a]", + " TableDual_13 rowcount:0" + ] + }, + { + "SQL": "select b from (select b from t where c > 1 order by b, a limit 3) as t1 order by b limit 2", + "Result": [ + "Group#0 Schema:[test.t.b]", + " Projection_13 input:[Group#1], test.t.b", + "Group#1 Schema:[test.t.a,test.t.b,test.t.c]", + " TopN_16 input:[Group#2], test.t.b:asc, test.t.a:asc, offset:0, count:2", + "Group#2 Schema:[test.t.a,test.t.b,test.t.c]", + " Selection_2 input:[Group#3], gt(test.t.c, 1)", + "Group#3 Schema:[test.t.a,test.t.b,test.t.c]", + " TableScan_1 table:t" + ] + }, + { + "SQL": "select a from (select a from t where b > 2 order by a, b limit 3 offset 1) as t1 order by a limit 2 offset 1", + "Result": [ + "Group#0 Schema:[test.t.a]", + " Projection_13 input:[Group#1], test.t.a", + "Group#1 Schema:[test.t.a,test.t.b]", + " TopN_16 input:[Group#2], test.t.a:asc, test.t.b:asc, offset:2, count:2", + "Group#2 Schema:[test.t.a,test.t.b]", + " Selection_2 input:[Group#3], gt(test.t.b, 2)", + "Group#3 Schema:[test.t.a,test.t.b]", + " TableScan_1 table:t" + ] + } + ] + }, { "Name": "TestTransformLimitToTableDual", "Cases": [ diff --git a/planner/cascades/transformation_rules.go b/planner/cascades/transformation_rules.go index 5b044b48cb..b04ccb8c9e 100644 --- a/planner/cascades/transformation_rules.go +++ b/planner/cascades/transformation_rules.go @@ -85,6 +85,7 @@ var defaultTransformationMap = map[memo.Operand][]Transformation{ NewRulePushTopNDownOuterJoin(), NewRulePushTopNDownUnionAll(), NewRulePushTopNDownTiKVSingleGather(), + NewRuleMergeAdjacentTopN(), }, } @@ -1283,6 +1284,66 @@ func (r *PushTopNDownTiKVSingleGather) OnTransform(old *memo.ExprIter) (newExprs return []*memo.GroupExpr{finalTopNExpr}, true, false, nil } +// MergeAdjacentTopN merge adjacent TopN. +type MergeAdjacentTopN struct { + baseRule +} + +// NewRuleMergeAdjacentTopN creates a new Transformation MergeAdjacentTopN. +// The pattern of this rule is `TopN->TopN->X`. +func NewRuleMergeAdjacentTopN() Transformation { + rule := &MergeAdjacentTopN{} + rule.pattern = memo.BuildPattern( + memo.OperandTopN, + memo.EngineAll, + memo.NewPattern(memo.OperandTopN, memo.EngineAll), + ) + return rule +} + +// Match implements Transformation interface. +func (r *MergeAdjacentTopN) Match(expr *memo.ExprIter) bool { + topN := expr.GetExpr().ExprNode.(*plannercore.LogicalTopN) + child := expr.Children[0].GetExpr().ExprNode.(*plannercore.LogicalTopN) + + // We can use this rule when the sort columns of parent TopN is a prefix of child TopN. + if len(child.ByItems) < len(topN.ByItems) { + return false + } + for i := 0; i < len(topN.ByItems); i++ { + if !topN.ByItems[i].Equal(topN.SCtx(), child.ByItems[i]) { + return false + } + } + return true +} + +// OnTransform implements Transformation interface. +// This rule tries to merge adjacent TopN. +func (r *MergeAdjacentTopN) OnTransform(old *memo.ExprIter) (newExprs []*memo.GroupExpr, eraseOld bool, eraseAll bool, err error) { + topN := old.GetExpr().ExprNode.(*plannercore.LogicalTopN) + child := old.Children[0].GetExpr().ExprNode.(*plannercore.LogicalTopN) + childGroups := old.Children[0].GetExpr().Children + + if child.Count <= topN.Offset { + tableDual := plannercore.LogicalTableDual{RowCount: 0}.Init(child.SCtx(), child.SelectBlockOffset()) + tableDual.SetSchema(old.GetExpr().Schema()) + tableDualExpr := memo.NewGroupExpr(tableDual) + return []*memo.GroupExpr{tableDualExpr}, true, true, nil + } + + offset := child.Offset + topN.Offset + count := uint64(math.Min(float64(child.Count-topN.Offset), float64(topN.Count))) + newTopN := plannercore.LogicalTopN{ + Count: count, + Offset: offset, + ByItems: child.ByItems, + }.Init(child.SCtx(), child.SelectBlockOffset()) + newTopNExpr := memo.NewGroupExpr(newTopN) + newTopNExpr.SetChildren(childGroups...) + return []*memo.GroupExpr{newTopNExpr}, true, false, nil +} + // MergeAggregationProjection merges the Projection below an Aggregation as a new Aggregation. // The Projection may be regenerated in the ImplementationPhase. But this rule allows the // Aggregation to match other rules, such as MergeAdjacentAggregation. diff --git a/planner/cascades/transformation_rules_test.go b/planner/cascades/transformation_rules_test.go index 3bff81b640..aa821c0158 100644 --- a/planner/cascades/transformation_rules_test.go +++ b/planner/cascades/transformation_rules_test.go @@ -228,6 +228,31 @@ func (s *testTransformationRuleSuite) TestMergeAggregationProjection(c *C) { testGroupToString(input, output, s, c) } +func (s *testTransformationRuleSuite) TestMergeAdjacentTopN(c *C) { + s.optimizer.ResetTransformationRules(map[memo.Operand][]Transformation{ + memo.OperandLimit: { + NewRuleTransformLimitToTopN(), + }, + memo.OperandTopN: { + NewRulePushTopNDownProjection(), + NewRuleMergeAdjacentTopN(), + }, + memo.OperandProjection: { + NewRuleMergeAdjacentProjection(), + }, + }) + defer func() { + s.optimizer.ResetTransformationRules(defaultTransformationMap) + }() + var input []string + var output []struct { + SQL string + Result []string + } + s.testData.GetTestCases(c, &input, &output) + testGroupToString(input, output, s, c) +} + func (s *testTransformationRuleSuite) TestMergeAdjacentLimit(c *C) { s.optimizer.ResetTransformationRules(map[memo.Operand][]Transformation{ memo.OperandLimit: { diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index bda7da337d..abf96d5f8f 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -1148,6 +1148,11 @@ func (by *ByItems) Clone() *ByItems { return &ByItems{Expr: by.Expr.Clone(), Desc: by.Desc} } +// Equal checks whether two ByItems are equal. +func (by *ByItems) Equal(ctx sessionctx.Context, other *ByItems) bool { + return by.Expr.Equal(ctx, other.Expr) && by.Desc == other.Desc +} + // itemTransformer transforms ParamMarkerExpr to PositionExpr in the context of ByItem type itemTransformer struct { }