From dc7bc0ba6c15959a56f6149402e0163cf6d320fc Mon Sep 17 00:00:00 2001 From: Ewan Chou Date: Fri, 15 Jan 2016 21:29:18 +0800 Subject: [PATCH 1/8] optimizer: refactor plan builder --- optimizer/plan/filterrate.go | 33 ++++++++ optimizer/plan/planbuilder.go | 137 ++++++++++++++++++++++++++++++++-- optimizer/plan/plans.go | 12 +++ optimizer/plan/refiner.go | 6 +- optimizer/plan/scan.go | 14 ++++ 5 files changed, 192 insertions(+), 10 deletions(-) create mode 100644 optimizer/plan/filterrate.go create mode 100644 optimizer/plan/scan.go diff --git a/optimizer/plan/filterrate.go b/optimizer/plan/filterrate.go new file mode 100644 index 0000000000..bb7ca80e45 --- /dev/null +++ b/optimizer/plan/filterrate.go @@ -0,0 +1,33 @@ +package plan + +import "github.com/pingcap/tidb/ast" + +func computeFilterRate(expr ast.ExprNode) float64 { + switch x := expr.(type) { + case *ast.BinaryOperationExpr: + return buildFromBinop(x) + case *ast.PatternInExpr: + return r.buildFromIn(x) + case *ast.ParenthesesExpr: + return r.build(x.Expr) + case *ast.BetweenExpr: + return r.buildFromBetween(x) + case *ast.IsNullExpr: + return r.buildFromIsNull(x) + case *ast.IsTruthExpr: + return r.buildFromIsTruth(x) + case *ast.PatternLikeExpr: + return r.buildFromPatternLike(x) + case *ast.ColumnNameExpr: + return r.buildFromColumnName(x) + } + computer := filterRateComputer{rate: 1} + for _, con := range conditions { + con.Accept(&computer) + } + return computer.rate +} + +func computeFilterRateInBinop(expr *ast.BinaryOperationExpr) float64 { + +} \ No newline at end of file diff --git a/optimizer/plan/planbuilder.go b/optimizer/plan/planbuilder.go index fba6f852cc..b59cf92769 100644 --- a/optimizer/plan/planbuilder.go +++ b/optimizer/plan/planbuilder.go @@ -137,7 +137,7 @@ func (b *planBuilder) buildSelect(sel *ast.SelectStmt) Plan { } var p Plan if sel.From != nil { - p = b.buildJoin(sel.From.TableRefs) + p = b.buildJoin(sel) if b.err != nil { return nil } @@ -175,7 +175,7 @@ func (b *planBuilder) buildSelect(sel *ast.SelectStmt) Plan { } } } - if sel.OrderBy != nil { + if sel.OrderBy != nil && !matchOrder(p, sel.OrderBy.Items){ p = b.buildSort(p, sel.OrderBy.Items) if b.err != nil { return nil @@ -190,8 +190,11 @@ func (b *planBuilder) buildSelect(sel *ast.SelectStmt) Plan { return p } -func (b *planBuilder) buildJoin(from *ast.Join) Plan { - // Only support single table for now. +func (b *planBuilder) buildJoin(sel *ast.SelectStmt) Plan { + from := sel.From.TableRefs + if from.Right != nil { + return ErrUnsupportedType.Gen("Only support single table for now.") + } ts, ok := from.Left.(*ast.TableSource) if !ok { b.err = ErrUnsupportedType.Gen("Unsupported type %T", from.Left) @@ -202,22 +205,140 @@ func (b *planBuilder) buildJoin(from *ast.Join) Plan { b.err = ErrUnsupportedType.Gen("Unsupported type %T", ts.Source) return nil } + conditions := splitWhere(sel.Where) + candidates := b.buildAllAccessMethodsPlan(tn, conditions) + var bestPlan Plan + var lowestCost float64 + for _, v := range candidates { + cost := EstimateCost(b.buildPseudoSelectPlan(v, sel)) + if bestPlan == nil { + bestPlan = v + lowestCost = cost + } + if cost < lowestCost { + bestPlan = v + lowestCost = cost + } + } + return bestPlan +} + +func (b *planBuilder) buildAllAccessMethodsPlan(tn *ast.TableName, conditions []ast.ExprNode) []Plan { + var candidates []Plan p := &TableScan{ Table: tn.TableInfo, - Ranges: []TableRange{{math.MinInt64, math.MaxInt64}}, } p.SetFields(tn.GetResultFields()) + var pkName model.CIStr + if p.Table.PKIsHandle { + for _, colInfo := range p.Table.Columns { + if mysql.HasPriKeyFlag(colInfo.Flag) { + pkName = colInfo.Name + } + } + } + for _, con := range conditions { + checker := conditionChecker{tableName: tn.TableInfo.Name, pkName: pkName} + if checker.check(con) { + p.AccessConditions = append(p.AccessConditions, con) + } else { + p.FilterConditions = append(p.FilterConditions, con) + } + } + candidates = append(candidates, p) + + for _, index := range tn.TableInfo.Indices { + ip := &IndexScan{Table: tn.TableInfo, Index: index} + ip.SetFields(tn.GetResultFields()) + // Only use first column as access condition for cost estimation, + // In executor, we can try to use second index column to build index range. + checker := conditionChecker{tableName: tn.TableInfo.Name, idx:index, columnOffset: 0} + for _, con := range conditions { + if checker.check(con) { + ip.AccessConditions = append(ip.AccessConditions, con) + } else { + ip.FilterConditions = append(ip.FilterConditions, con) + } + } + candidates = append(candidates, ip) + } + return candidates +} + +func (b *planBuilder) buildPseudoSelectPlan(p Plan, sel *ast.SelectStmt) Plan { + if sel.OrderBy == nil { + return p + } + if sel.GroupBy != nil { + return p + } + if !matchOrder(p, sel.OrderBy.Items) { + np := &Sort{ByItems: sel.OrderBy.Items} + np.SetSrc(p) + p = np + } + if sel.Limit != nil { + np := &Limit{Offset: sel.Limit.Offset, Count: sel.Limit.Count} + np.SetSrc(p) + np.SetLimit(0) + p = np + } return p } +func matchOrder(p Plan, items []*ast.ByItem) bool { + switch x := p.(type) { + case *TableScan: + if len(items) != 1 || !x.Table.PKIsHandle { + return false + } + if items[0].Desc { + return false + } + switch items[0].Expr.(type) { + case *ast.ColumnNameExpr: + case *ast.PositionExpr: + default: + return false + } + if mysql.HasPriKeyFlag(items[0].Expr.GetFlag()) { + return true + } + case *IndexScan: + if len(items) > len(x.Index.Columns) { + return false + } + for i, item := range items { + if item.Desc { + return false + } + var rf *ast.ResultField + switch y := item.Expr.(type) { + case *ast.ColumnNameExpr: + rf = y.Refer + case *ast.PositionExpr: + rf = y.Refer + } + if rf == nil { + return false + } + if rf.Table.Name.L != x.Table.Name.L || rf.Column.Name.L != x.Index.Columns[i].Name.L { + return false + } + } + return true + } + return false +} + // splitWhere split a where expression to a list of AND conditions. -func (b *planBuilder) splitWhere(where ast.ExprNode) []ast.ExprNode { +func splitWhere(where ast.ExprNode) []ast.ExprNode { var conditions []ast.ExprNode switch x := where.(type) { case *ast.BinaryOperationExpr: if x.Op == opcode.AndAnd { conditions = append(conditions, x.L) - conditions = append(conditions, b.splitWhere(x.R)...) + conditions = append(conditions, splitWhere(x.R)...) } else { conditions = append(conditions, x) } @@ -229,7 +350,7 @@ func (b *planBuilder) splitWhere(where ast.ExprNode) []ast.ExprNode { func (b *planBuilder) buildFilter(src Plan, where ast.ExprNode) *Filter { filter := &Filter{ - Conditions: b.splitWhere(where), + Conditions: splitWhere(where), } filter.SetSrc(src) filter.SetFields(src.Fields()) diff --git a/optimizer/plan/plans.go b/optimizer/plan/plans.go index c7ab0dd5f3..cb16d8c4e0 100644 --- a/optimizer/plan/plans.go +++ b/optimizer/plan/plans.go @@ -31,6 +31,12 @@ type TableScan struct { Table *model.TableInfo Desc bool Ranges []TableRange + + // AccessConditions can be used to build index range. + AccessConditions []ast.ExprNode + + // FilterConditions can be used to filter result. + FilterConditions []ast.ExprNode } // Accept implements Plan Accept interface. @@ -118,6 +124,12 @@ type IndexScan struct { // Desc indicates whether the index should be scanned in descending order. Desc bool + + // AccessConditions can be used to build index range. + AccessConditions []ast.ExprNode + + // FilterConditions can be used to filter result. + FilterConditions []ast.ExprNode } // Accept implements Plan Accept interface. diff --git a/optimizer/plan/refiner.go b/optimizer/plan/refiner.go index 87e081723e..4a587f8a35 100644 --- a/optimizer/plan/refiner.go +++ b/optimizer/plan/refiner.go @@ -264,6 +264,8 @@ func (c *conditionChecker) checkColumnExpr(expr ast.ExprNode) bool { if c.pkName.L != "" { return c.pkName.L == cn.Refer.Column.Name.L } - - return cn.Refer.Column.Name.L == c.idx.Columns[c.columnOffset].Name.L + if c.idx != nil { + return cn.Refer.Column.Name.L == c.idx.Columns[c.columnOffset].Name.L + } + return true } diff --git a/optimizer/plan/scan.go b/optimizer/plan/scan.go new file mode 100644 index 0000000000..d2a7330fda --- /dev/null +++ b/optimizer/plan/scan.go @@ -0,0 +1,14 @@ +package plan + +type AccessMethodType int + +const ( + AcccessTableScan AccessMethodType = iota + 1 + AccessIndexScan + AccessIndexOnly +) + +type Scan struct { + AccessMethod AccessMethodType + UseIndex +} \ No newline at end of file From fa5283e0d9f0ae5405f0609ad25d1d82574720d8 Mon Sep 17 00:00:00 2001 From: Ewan Chou Date: Mon, 18 Jan 2016 12:00:25 +0800 Subject: [PATCH 2/8] optmizer: refactor optimizer. --- executor/builder.go | 3 - optimizer/optimizer.go | 19 +--- optimizer/plan/alternatives.go | 100 --------------------- optimizer/plan/cost.go | 72 +++------------ optimizer/plan/explainer.go | 3 - optimizer/plan/filterrate.go | 90 +++++++++++++++---- optimizer/plan/plan_test.go | 19 +--- optimizer/plan/planbuilder.go | 158 ++++++++++++++++++--------------- optimizer/plan/plans.go | 7 -- optimizer/plan/refiner.go | 117 +++--------------------- optimizer/plan/scan.go | 14 --- 11 files changed, 186 insertions(+), 416 deletions(-) delete mode 100644 optimizer/plan/alternatives.go delete mode 100644 optimizer/plan/scan.go diff --git a/executor/builder.go b/executor/builder.go index 528618ef92..eef15d07b0 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -213,9 +213,6 @@ func (b *executorBuilder) buildAggregate(v *plan.Aggregate) Executor { func (b *executorBuilder) buildSort(v *plan.Sort) Executor { src := b.build(v.Src()) - if v.Bypass && !v.ByItems[0].Desc { - return src - } e := &SortExec{ Src: src, ByItems: v.ByItems, diff --git a/optimizer/optimizer.go b/optimizer/optimizer.go index 3322412669..3f16708ebb 100644 --- a/optimizer/optimizer.go +++ b/optimizer/optimizer.go @@ -39,28 +39,11 @@ func Optimize(ctx context.Context, node ast.Node) (plan.Plan, error) { if err != nil { return nil, errors.Trace(err) } - alts, err := plan.Alternatives(p) - if err != nil { - return nil, errors.Trace(err) - } err = plan.Refine(p) if err != nil { return nil, errors.Trace(err) } - bestCost := plan.EstimateCost(p) - bestPlan := p - for _, alt := range alts { - err = plan.Refine(alt) - if err != nil { - return nil, errors.Trace(err) - } - cost := plan.EstimateCost(alt) - if cost < bestCost { - bestCost = cost - bestPlan = alt - } - } - return bestPlan, nil + return p, nil } // Prepare prepares a raw statement parsed from parser. diff --git a/optimizer/plan/alternatives.go b/optimizer/plan/alternatives.go deleted file mode 100644 index 01fce72623..0000000000 --- a/optimizer/plan/alternatives.go +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2015 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package plan - -import "github.com/juju/errors" - -// Alternatives returns multiple alternative plans that -// can be picked based on their cost. -func Alternatives(p Plan) ([]Plan, error) { - var plans []Plan - switch x := p.(type) { - case nil: - case *TableScan: - plans = tableScanAlternatives(x) - case WithSrcPlan: - var err error - plans, err = planWithSrcAlternatives(x) - if err != nil { - return nil, errors.Trace(err) - } - case *ShowDDL: - case *CheckTable: - case *Prepare: - case *Execute: - case *Deallocate: - default: - return nil, ErrUnsupportedType.Gen("Unknown plan %T", p) - } - return plans, nil -} - -// tableScanAlternatives returns all index plans from the same table. -func tableScanAlternatives(p *TableScan) []Plan { - var alts []Plan - for _, v := range p.Table.Indices { - fullRange := &IndexRange{ - LowVal: []interface{}{nil}, - HighVal: []interface{}{MaxVal}, - } - is := &IndexScan{ - Index: v, - Table: p.Table, - Ranges: []*IndexRange{fullRange}, - } - is.SetFields(p.Fields()) - alts = append(alts, is) - } - return alts -} - -// planWithSrcAlternatives shallow copies the WithSrcPlan, -// and sets its src to src alternatives. -func planWithSrcAlternatives(p WithSrcPlan) ([]Plan, error) { - srcs, err := Alternatives(p.Src()) - if err != nil { - return nil, errors.Trace(err) - } - for i, val := range srcs { - alt := shallowCopy(p) - alt.SetSrc(val) - srcs[i] = alt - } - return srcs, nil -} - -func shallowCopy(p WithSrcPlan) WithSrcPlan { - var copied WithSrcPlan - switch x := p.(type) { - case *Filter: - n := *x - copied = &n - case *SelectLock: - n := *x - copied = &n - case *SelectFields: - n := *x - copied = &n - case *Sort: - n := *x - copied = &n - case *Limit: - n := *x - copied = &n - case *Aggregate: - n := *x - copied = &n - } - return copied -} diff --git a/optimizer/plan/cost.go b/optimizer/plan/cost.go index 7be92df451..343adcac25 100644 --- a/optimizer/plan/cost.go +++ b/optimizer/plan/cost.go @@ -61,21 +61,14 @@ func (c *costEstimator) Leave(p Plan) (Plan, bool) { v.rowCount = v.Src().RowCount() v.totalCost = v.Src().TotalCost() case *Sort: - if v.Bypass { - // Bypassed sort doesn't add extra cost. - v.startupCost = v.Src().StartupCost() + // Sort plan must retrieve all the rows before returns the first row. + v.startupCost = v.Src().TotalCost() + v.Src().RowCount()*SortCost + if v.limit == 0 { v.rowCount = v.Src().RowCount() - v.totalCost = v.Src().TotalCost() } else { - // Sort plan must retrieve all the rows before returns the first row. - v.startupCost = v.Src().TotalCost() + v.Src().RowCount()*SortCost - if v.limit == 0 { - v.rowCount = v.Src().RowCount() - } else { - v.rowCount = math.Min(v.Src().RowCount(), v.limit) - } - v.totalCost = v.startupCost + v.rowCount*RowCost + v.rowCount = math.Min(v.Src().RowCount(), v.limit) } + v.totalCost = v.startupCost + v.rowCount*RowCost case *TableScan: c.tableScan(v) } @@ -83,28 +76,9 @@ func (c *costEstimator) Leave(p Plan) (Plan, bool) { } func (c *costEstimator) tableScan(v *TableScan) { - var rowCount float64 - if len(v.Ranges) == 1 && v.Ranges[0].LowVal == math.MinInt64 && v.Ranges[0].HighVal == math.MaxInt64 { - // full range use default row count. - rowCount = FullRangeCount - } else { - for _, v := range v.Ranges { - // for condition like 'a = 0'. - if v.LowVal == v.HighVal { - rowCount++ - continue - } - // For condition like 'a < 0'. - if v.LowVal == math.MinInt64 { - rowCount += HalfRangeCount - } - // For condition like 'a > 0'. - if v.HighVal == math.MaxInt64 { - rowCount += HalfRangeCount - } - // For condition like 'a > 0 and a < 1'. - rowCount += MiddleRangeCount - } + var rowCount float64 = FullRangeCount + for _, con := range v.AccessConditions { + rowCount *= computeFilterRate(con) } v.startupCost = 0 if v.limit == 0 { @@ -117,33 +91,9 @@ func (c *costEstimator) tableScan(v *TableScan) { } func (c *costEstimator) indexScan(v *IndexScan) { - var rowCount float64 - if len(v.Ranges) == 1 && v.Ranges[0].LowVal[0] == nil && v.Ranges[0].HighVal[0] == MaxVal { - // full range use default row count. - rowCount = FullRangeCount - } else { - for _, v := range v.Ranges { - // for condition like 'a = 0'. - if v.IsPoint() { - rowCount++ - continue - } - // For condition like 'a < 0'. - if v.LowVal[0] == nil || v.LowVal[0] == MinNotNullVal { - rowCount += HalfRangeCount - } - // For condition like 'a > 0'. - if v.HighVal[0] == MaxVal { - rowCount += HalfRangeCount - } - // For condition like 'a > 0 and a < 1'. - rowCount += MiddleRangeCount - } - // If the index has too many ranges, the row count may exceed the default row count. - // Make sure the cost is lower than full range. - if rowCount >= FullRangeCount { - rowCount = FullRangeCount - 1 - } + var rowCount float64 = FullRangeCount + for _, con := range v.AccessConditions { + rowCount *= computeFilterRate(con) } v.startupCost = 0 if v.limit == 0 { diff --git a/optimizer/plan/explainer.go b/optimizer/plan/explainer.go index 9ab8f94167..67aff90861 100644 --- a/optimizer/plan/explainer.go +++ b/optimizer/plan/explainer.go @@ -53,9 +53,6 @@ func (e *explainer) Leave(in Plan) (Plan, bool) { case *ShowDDL: str = "ShowDDL" case *Sort: - if x.Bypass { - return in, true - } str = "Sort" case *TableScan: if len(x.Ranges) > 0 { diff --git a/optimizer/plan/filterrate.go b/optimizer/plan/filterrate.go index bb7ca80e45..ac319b45d4 100644 --- a/optimizer/plan/filterrate.go +++ b/optimizer/plan/filterrate.go @@ -1,33 +1,85 @@ +// Copyright 2015 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + package plan -import "github.com/pingcap/tidb/ast" +import ( + "github.com/pingcap/tidb/ast" + "github.com/pingcap/tidb/parser/opcode" +) +// computeFilterRate computes the filter rate for an expression. +// It only depends on the expression type, not the expression value. +// The expr parameter should contains only one column name. func computeFilterRate(expr ast.ExprNode) float64 { switch x := expr.(type) { - case *ast.BinaryOperationExpr: - return buildFromBinop(x) - case *ast.PatternInExpr: - return r.buildFromIn(x) - case *ast.ParenthesesExpr: - return r.build(x.Expr) case *ast.BetweenExpr: - return r.buildFromBetween(x) + return computeBetweenFilterRate(x) + case *ast.BinaryOperationExpr: + return computeBinopFilterRate(x) case *ast.IsNullExpr: - return r.buildFromIsNull(x) + return computeIsNullFilterRate(x) case *ast.IsTruthExpr: - return r.buildFromIsTruth(x) + return computeIsTrueFilterRate(x) + case *ast.ParenthesesExpr: + return computeFilterRate(x.Expr) + case *ast.PatternInExpr: + return computePatternInFilterRate(x) case *ast.PatternLikeExpr: - return r.buildFromPatternLike(x) + return computePatternLikeFilterRate(x) case *ast.ColumnNameExpr: - return r.buildFromColumnName(x) + return 1 } - computer := filterRateComputer{rate: 1} - for _, con := range conditions { - con.Accept(&computer) - } - return computer.rate + return 1 } -func computeFilterRateInBinop(expr *ast.BinaryOperationExpr) float64 { +func computeBetweenFilterRate(expr *ast.BetweenExpr) float64 { + return 0.3 +} -} \ No newline at end of file +func computeBinopFilterRate(expr *ast.BinaryOperationExpr) float64 { + switch expr.Op { + case opcode.AndAnd: + return computeFilterRate(expr.L) * computeFilterRate(expr.R) + case opcode.OrOr: + rate := computeFilterRate(expr.L) + computeFilterRate(expr.R) + if rate > 1 { + rate = 1 + } + return rate + case opcode.EQ: + return 0.001 + case opcode.GT, opcode.GE, opcode.LT, opcode.LE: + return 0.4 + } + return 1 +} + +func computeIsNullFilterRate(expr *ast.IsNullExpr) float64 { + return 0.01 +} + +func computeIsTrueFilterRate(expr *ast.IsTruthExpr) float64 { + return 0.9 +} + +func computePatternInFilterRate(expr *ast.PatternInExpr) float64 { + if len(expr.List) > 0 { + return 0.01 * float64(len(expr.List)) + } + return 1 +} + +func computePatternLikeFilterRate(expr *ast.PatternLikeExpr) float64 { + return 0.1 +} diff --git a/optimizer/plan/plan_test.go b/optimizer/plan/plan_test.go index 315e0e8395..90bb17a9d2 100644 --- a/optimizer/plan/plan_test.go +++ b/optimizer/plan/plan_test.go @@ -199,6 +199,7 @@ func (s *testPlanSuite) TestRangeBuilder(c *C) { } func (s *testPlanSuite) TestBuilder(c *C) { + c.Skip("for new builder") cases := []struct { sqlStr string planStr string @@ -314,25 +315,11 @@ func (s *testPlanSuite) TestBestPlan(c *C) { p, err := BuildPlan(stmt) c.Assert(err, IsNil) - alts, err := Alternatives(p) - c.Assert(err, IsNil) err = Refine(p) + explainStr, err := Explain(p) c.Assert(err, IsNil) - bestCost := EstimateCost(p) - bestPlan := p - - for _, alt := range alts { - c.Assert(Refine(alt), IsNil) - cost := EstimateCost(alt) - if cost < bestCost { - bestCost = cost - bestPlan = alt - } - } - explainStr, err := Explain(bestPlan) - c.Assert(err, IsNil) - c.Assert(explainStr, Equals, ca.best, Commentf("for %s cost %v", ca.sql, bestCost)) + c.Assert(explainStr, Equals, ca.best, Commentf("for %s cost %v", ca.sql, EstimateCost(p))) } } diff --git a/optimizer/plan/planbuilder.go b/optimizer/plan/planbuilder.go index b59cf92769..40db6bfd5b 100644 --- a/optimizer/plan/planbuilder.go +++ b/optimizer/plan/planbuilder.go @@ -14,8 +14,6 @@ package plan import ( - "math" - "github.com/juju/errors" "github.com/pingcap/tidb/ast" "github.com/pingcap/tidb/infoschema" @@ -175,7 +173,7 @@ func (b *planBuilder) buildSelect(sel *ast.SelectStmt) Plan { } } } - if sel.OrderBy != nil && !matchOrder(p, sel.OrderBy.Items){ + if sel.OrderBy != nil && !matchOrder(p, sel.OrderBy.Items) { p = b.buildSort(p, sel.OrderBy.Items) if b.err != nil { return nil @@ -193,7 +191,8 @@ func (b *planBuilder) buildSelect(sel *ast.SelectStmt) Plan { func (b *planBuilder) buildJoin(sel *ast.SelectStmt) Plan { from := sel.From.TableRefs if from.Right != nil { - return ErrUnsupportedType.Gen("Only support single table for now.") + b.err = ErrUnsupportedType.Gen("Only support single table for now.") + return nil } ts, ok := from.Left.(*ast.TableSource) if !ok { @@ -226,7 +225,7 @@ func (b *planBuilder) buildJoin(sel *ast.SelectStmt) Plan { func (b *planBuilder) buildAllAccessMethodsPlan(tn *ast.TableName, conditions []ast.ExprNode) []Plan { var candidates []Plan p := &TableScan{ - Table: tn.TableInfo, + Table: tn.TableInfo, } p.SetFields(tn.GetResultFields()) var pkName model.CIStr @@ -238,9 +237,13 @@ func (b *planBuilder) buildAllAccessMethodsPlan(tn *ast.TableName, conditions [] } } for _, con := range conditions { - checker := conditionChecker{tableName: tn.TableInfo.Name, pkName: pkName} - if checker.check(con) { - p.AccessConditions = append(p.AccessConditions, con) + if pkName.L != "" { + checker := conditionChecker{tableName: tn.TableInfo.Name, pkName: pkName} + if checker.check(con) { + p.AccessConditions = append(p.AccessConditions, con) + } else { + p.FilterConditions = append(p.FilterConditions, con) + } } else { p.FilterConditions = append(p.FilterConditions, con) } @@ -252,7 +255,7 @@ func (b *planBuilder) buildAllAccessMethodsPlan(tn *ast.TableName, conditions [] ip.SetFields(tn.GetResultFields()) // Only use first column as access condition for cost estimation, // In executor, we can try to use second index column to build index range. - checker := conditionChecker{tableName: tn.TableInfo.Name, idx:index, columnOffset: 0} + checker := conditionChecker{tableName: tn.TableInfo.Name, idx: index, columnOffset: 0} for _, con := range conditions { if checker.check(con) { ip.AccessConditions = append(ip.AccessConditions, con) @@ -265,6 +268,7 @@ func (b *planBuilder) buildAllAccessMethodsPlan(tn *ast.TableName, conditions [] return candidates } +// buildPseudoSelectPlan pre-builds more complete plans that may affect total cost. func (b *planBuilder) buildPseudoSelectPlan(p Plan, sel *ast.SelectStmt) Plan { if sel.OrderBy == nil { return p @@ -286,68 +290,6 @@ func (b *planBuilder) buildPseudoSelectPlan(p Plan, sel *ast.SelectStmt) Plan { return p } -func matchOrder(p Plan, items []*ast.ByItem) bool { - switch x := p.(type) { - case *TableScan: - if len(items) != 1 || !x.Table.PKIsHandle { - return false - } - if items[0].Desc { - return false - } - switch items[0].Expr.(type) { - case *ast.ColumnNameExpr: - case *ast.PositionExpr: - default: - return false - } - if mysql.HasPriKeyFlag(items[0].Expr.GetFlag()) { - return true - } - case *IndexScan: - if len(items) > len(x.Index.Columns) { - return false - } - for i, item := range items { - if item.Desc { - return false - } - var rf *ast.ResultField - switch y := item.Expr.(type) { - case *ast.ColumnNameExpr: - rf = y.Refer - case *ast.PositionExpr: - rf = y.Refer - } - if rf == nil { - return false - } - if rf.Table.Name.L != x.Table.Name.L || rf.Column.Name.L != x.Index.Columns[i].Name.L { - return false - } - } - return true - } - return false -} - -// splitWhere split a where expression to a list of AND conditions. -func splitWhere(where ast.ExprNode) []ast.ExprNode { - var conditions []ast.ExprNode - switch x := where.(type) { - case *ast.BinaryOperationExpr: - if x.Op == opcode.AndAnd { - conditions = append(conditions, x.L) - conditions = append(conditions, splitWhere(x.R)...) - } else { - conditions = append(conditions, x) - } - default: - conditions = append(conditions, where) - } - return conditions -} - func (b *planBuilder) buildFilter(src Plan, where ast.ExprNode) *Filter { filter := &Filter{ Conditions: splitWhere(where), @@ -476,3 +418,77 @@ func buildResultField(tableName, name string, tp byte, size int) *ast.ResultFiel Expr: expr, } } + +// matchOrder checks if the plan has the same ordering as items. +func matchOrder(p Plan, items []*ast.ByItem) bool { + switch x := p.(type) { + case *TableScan: + if len(items) != 1 || !x.Table.PKIsHandle { + return false + } + if items[0].Desc { + return false + } + var refer *ast.ResultField + switch x := items[0].Expr.(type) { + case *ast.ColumnNameExpr: + refer = x.Refer + case *ast.PositionExpr: + refer = x.Refer + default: + return false + } + if mysql.HasPriKeyFlag(refer.Column.Flag) { + return true + } + return false + case *IndexScan: + if len(items) > len(x.Index.Columns) { + return false + } + for i, item := range items { + if item.Desc { + return false + } + var rf *ast.ResultField + switch y := item.Expr.(type) { + case *ast.ColumnNameExpr: + rf = y.Refer + case *ast.PositionExpr: + rf = y.Refer + } + if rf == nil { + return false + } + if rf.Table.Name.L != x.Table.Name.L || rf.Column.Name.L != x.Index.Columns[i].Name.L { + return false + } + } + return true + case *Aggregate: + return false + case *Sort: + // Sort plan should not be checked here as there should only be one sort plan in a plan tree. + return false + case WithSrcPlan: + return matchOrder(x.Src(), items) + } + return true +} + +// splitWhere split a where expression to a list of AND conditions. +func splitWhere(where ast.ExprNode) []ast.ExprNode { + var conditions []ast.ExprNode + switch x := where.(type) { + case *ast.BinaryOperationExpr: + if x.Op == opcode.AndAnd { + conditions = append(conditions, x.L) + conditions = append(conditions, splitWhere(x.R)...) + } else { + conditions = append(conditions, x) + } + default: + conditions = append(conditions, where) + } + return conditions +} diff --git a/optimizer/plan/plans.go b/optimizer/plan/plans.go index cb16d8c4e0..994073c6cd 100644 --- a/optimizer/plan/plans.go +++ b/optimizer/plan/plans.go @@ -233,10 +233,6 @@ type Sort struct { planWithSrc ByItems []*ast.ByItem - // If the source is already in the same order, the sort process can be by passed. - // It depends on the Src plan, so if the Src plan has been modified, Bypass needs - // to be recalculated. - Bypass bool } // Accept implements Plan Accept interface. @@ -259,9 +255,6 @@ func (p *Sort) Accept(v Visitor) (Plan, bool) { // Bypass has to be determined before this get called. func (p *Sort) SetLimit(limit float64) { p.limit = limit - if p.Bypass { - p.src.SetLimit(limit) - } } // Limit represents offset and limit plan. diff --git a/optimizer/plan/refiner.go b/optimizer/plan/refiner.go index 4a587f8a35..8217b74baf 100644 --- a/optimizer/plan/refiner.go +++ b/optimizer/plan/refiner.go @@ -14,15 +14,15 @@ package plan import ( + "math" + "github.com/pingcap/tidb/ast" "github.com/pingcap/tidb/model" - "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/parser/opcode" "github.com/pingcap/tidb/util/types" ) -// Refine tries to build index range, bypass sort, set limit for source plan. -// It prepares the plan for cost estimation. +// Refine tries to build index or table range. func Refine(p Plan) error { r := refiner{} p.Accept(&r) @@ -30,22 +30,10 @@ func Refine(p Plan) error { } type refiner struct { - conditions []ast.ExprNode - // store scan plan for sort to use. - indexScan *IndexScan - tableScan *TableScan - err error + err error } func (r *refiner) Enter(in Plan) (Plan, bool) { - switch x := in.(type) { - case *Filter: - r.conditions = x.Conditions - case *IndexScan: - r.indexScan = x - case *TableScan: - r.tableScan = x - } return in, false } @@ -55,66 +43,12 @@ func (r *refiner) Leave(in Plan) (Plan, bool) { r.buildIndexRange(x) case *Limit: x.SetLimit(0) - case *Sort: - r.sortBypass(x) case *TableScan: r.buildTableRange(x) } return in, r.err == nil } -func (r *refiner) sortBypass(p *Sort) { - if r.indexScan != nil { - idx := r.indexScan.Index - if len(p.ByItems) > len(idx.Columns) { - return - } - var desc bool - for i, val := range p.ByItems { - if val.Desc { - desc = true - } - cn, ok := val.Expr.(*ast.ColumnNameExpr) - if !ok { - return - } - if r.indexScan.Table.Name.L != cn.Refer.Table.Name.L { - return - } - indexColumn := idx.Columns[i] - if indexColumn.Name.L != cn.Refer.Column.Name.L { - return - } - } - if desc { - // TODO: support desc when index reverse iterator is supported. - r.indexScan.Desc = true - return - } - p.Bypass = true - } else if r.tableScan != nil { - if len(p.ByItems) != 1 { - return - } - byItem := p.ByItems[0] - if byItem.Desc { - // TODO: support desc when table reverse iterator is supported. - return - } - cn, ok := byItem.Expr.(*ast.ColumnNameExpr) - if !ok { - return - } - if !mysql.HasPriKeyFlag(cn.Refer.Column.Flag) { - return - } - if !cn.Refer.Table.PKIsHandle { - return - } - p.Bypass = true - } -} - var fullRange = []rangePoint{ {start: true}, {value: MaxVal}, @@ -122,50 +56,25 @@ var fullRange = []rangePoint{ func (r *refiner) buildIndexRange(p *IndexScan) { rb := rangeBuilder{} - for i := 0; i < len(p.Index.Columns); i++ { - checker := conditionChecker{idx: p.Index, tableName: p.Table.Name, columnOffset: i} - rangePoints := fullRange - var columnUsed bool - for _, cond := range r.conditions { - if checker.check(cond) { - rangePoints = rb.intersection(rangePoints, rb.build(cond)) - columnUsed = true - } - } - if !columnUsed { - // For multi-column index, if the prefix column is not used, following columns - // can not be used. - break - } - if i == 0 { - // Build index range from the first column. - p.Ranges = rb.buildIndexRanges(rangePoints) - } else { - // range built from following columns should be appended to previous ranges. - p.Ranges = rb.appendIndexRanges(p.Ranges, rangePoints) - } + rangePoints := fullRange + for _, cond := range p.AccessConditions { + rangePoints = rb.intersection(rangePoints, rb.build(cond)) } + p.Ranges = rb.buildIndexRanges(rangePoints) + // TODO: build index range for second column. r.err = rb.err return } func (r *refiner) buildTableRange(p *TableScan) { - var pkHandleColumn *model.ColumnInfo - for _, colInfo := range p.Table.Columns { - if mysql.HasPriKeyFlag(colInfo.Flag) && p.Table.PKIsHandle { - pkHandleColumn = colInfo - } - } - if pkHandleColumn == nil { + if len(p.AccessConditions) == 0 { + p.Ranges = []TableRange{{math.MinInt64, math.MaxInt64}} return } rb := rangeBuilder{} rangePoints := fullRange - checker := conditionChecker{pkName: pkHandleColumn.Name, tableName: p.Table.Name} - for _, cond := range r.conditions { - if checker.check(cond) { - rangePoints = rb.intersection(rangePoints, rb.build(cond)) - } + for _, cond := range p.AccessConditions { + rangePoints = rb.intersection(rangePoints, rb.build(cond)) } p.Ranges = rb.buildTableRanges(rangePoints) r.err = rb.err diff --git a/optimizer/plan/scan.go b/optimizer/plan/scan.go deleted file mode 100644 index d2a7330fda..0000000000 --- a/optimizer/plan/scan.go +++ /dev/null @@ -1,14 +0,0 @@ -package plan - -type AccessMethodType int - -const ( - AcccessTableScan AccessMethodType = iota + 1 - AccessIndexScan - AccessIndexOnly -) - -type Scan struct { - AccessMethod AccessMethodType - UseIndex -} \ No newline at end of file From b52bd360008777e2e92965e17c6c86dbd40289ba Mon Sep 17 00:00:00 2001 From: Ewan Chou Date: Mon, 18 Jan 2016 16:22:53 +0800 Subject: [PATCH 3/8] optimizer: remove filter plan. --- executor/builder.go | 31 ++++++---- optimizer/plan/cost.go | 4 -- optimizer/plan/explainer.go | 2 - optimizer/plan/filterrate.go | 60 +++++++++++++----- optimizer/plan/plan_test.go | 111 ++++++++++++++-------------------- optimizer/plan/planbuilder.go | 22 +------ optimizer/plan/plans.go | 32 ---------- session_test.go | 2 +- 8 files changed, 110 insertions(+), 154 deletions(-) diff --git a/executor/builder.go b/executor/builder.go index eef15d07b0..ca1a817aea 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -53,8 +53,6 @@ func (b *executorBuilder) build(p plan.Plan) Executor { return b.buildDeallocate(v) case *plan.Execute: return b.buildExecute(v) - case *plan.Filter: - return b.buildFilter(v) case *plan.IndexScan: return b.buildIndexScan(v) case *plan.Limit: @@ -79,13 +77,22 @@ func (b *executorBuilder) build(p plan.Plan) Executor { func (b *executorBuilder) buildTableScan(v *plan.TableScan) Executor { table, _ := b.is.TableByID(v.Table.ID) - return &TableScanExec{ + e := &TableScanExec{ t: table, fields: v.Fields(), ctx: b.ctx, ranges: v.Ranges, seekHandle: math.MinInt64, } + if len(v.FilterConditions) != 0 { + fe := &FilterExec{ + Src: e, + Condition: b.joinConditions(v.FilterConditions), + ctx: b.ctx, + } + return fe + } + return e } func (b *executorBuilder) buildShowDDL(v *plan.ShowDDL) Executor { @@ -136,6 +143,14 @@ func (b *executorBuilder) buildIndexScan(v *plan.IndexScan) Executor { for i, val := range v.Ranges { e.Ranges[i] = b.buildIndexRange(e, val) } + if len(v.FilterConditions) != 0 { + fe := &FilterExec{ + Src: e, + Condition: b.joinConditions(v.FilterConditions), + ctx: b.ctx, + } + return fe + } return e } @@ -162,16 +177,6 @@ func (b *executorBuilder) joinConditions(conditions []ast.ExprNode) ast.ExprNode return condition } -func (b *executorBuilder) buildFilter(v *plan.Filter) Executor { - src := b.build(v.Src()) - e := &FilterExec{ - Src: src, - Condition: b.joinConditions(v.Conditions), - ctx: b.ctx, - } - return e -} - func (b *executorBuilder) buildSelectLock(v *plan.SelectLock) Executor { src := b.build(v.Src()) if autocommit.ShouldAutocommit(b.ctx) { diff --git a/optimizer/plan/cost.go b/optimizer/plan/cost.go index 343adcac25..217f24fa28 100644 --- a/optimizer/plan/cost.go +++ b/optimizer/plan/cost.go @@ -40,10 +40,6 @@ func (c *costEstimator) Enter(p Plan) (Plan, bool) { // Leave implements Visitor Leave interface. func (c *costEstimator) Leave(p Plan) (Plan, bool) { switch v := p.(type) { - case *Filter: - v.startupCost = v.Src().StartupCost() - v.rowCount = v.Src().RowCount() * FilterRate - v.totalCost = v.Src().TotalCost() case *IndexScan: c.indexScan(v) case *Limit: diff --git a/optimizer/plan/explainer.go b/optimizer/plan/explainer.go index 67aff90861..ab85a79ab7 100644 --- a/optimizer/plan/explainer.go +++ b/optimizer/plan/explainer.go @@ -40,8 +40,6 @@ func (e *explainer) Leave(in Plan) (Plan, bool) { switch x := in.(type) { case *CheckTable: str = "CheckTable" - case *Filter: - str = "Filter" case *IndexScan: str = fmt.Sprintf("Index(%s.%s)", x.Table.Name.L, x.Index.Name.L) case *Limit: diff --git a/optimizer/plan/filterrate.go b/optimizer/plan/filterrate.go index ac319b45d4..f489fb0d82 100644 --- a/optimizer/plan/filterrate.go +++ b/optimizer/plan/filterrate.go @@ -18,13 +18,24 @@ import ( "github.com/pingcap/tidb/parser/opcode" ) +const ( + rateFull = 1 + rateEqual = 0.001 + rateNotEqual = 0.999 + rateBetween = 0.3 + rateGreaterOrLess = 0.4 + rateIsFalse = 0.01 + rateIsNull = 0.01 + rateLike = 0.1 +) + // computeFilterRate computes the filter rate for an expression. // It only depends on the expression type, not the expression value. // The expr parameter should contains only one column name. func computeFilterRate(expr ast.ExprNode) float64 { switch x := expr.(type) { case *ast.BetweenExpr: - return computeBetweenFilterRate(x) + return rateBetween case *ast.BinaryOperationExpr: return computeBinopFilterRate(x) case *ast.IsNullExpr: @@ -38,13 +49,9 @@ func computeFilterRate(expr ast.ExprNode) float64 { case *ast.PatternLikeExpr: return computePatternLikeFilterRate(x) case *ast.ColumnNameExpr: - return 1 + return rateFull } - return 1 -} - -func computeBetweenFilterRate(expr *ast.BetweenExpr) float64 { - return 0.3 + return rateFull } func computeBinopFilterRate(expr *ast.BinaryOperationExpr) float64 { @@ -53,33 +60,54 @@ func computeBinopFilterRate(expr *ast.BinaryOperationExpr) float64 { return computeFilterRate(expr.L) * computeFilterRate(expr.R) case opcode.OrOr: rate := computeFilterRate(expr.L) + computeFilterRate(expr.R) - if rate > 1 { - rate = 1 + if rate > rateFull { + rate = rateFull } return rate case opcode.EQ: - return 0.001 + return rateEqual case opcode.GT, opcode.GE, opcode.LT, opcode.LE: - return 0.4 + return rateGreaterOrLess + case opcode.NE: + return rateNotEqual } return 1 } func computeIsNullFilterRate(expr *ast.IsNullExpr) float64 { - return 0.01 + if expr.Not { + return rateFull - rateIsNull + } + return rateIsNull } func computeIsTrueFilterRate(expr *ast.IsTruthExpr) float64 { - return 0.9 + if expr.True == 0 { + if expr.Not { + return rateFull - rateIsFalse + } + return rateIsFalse + } + if expr.Not { + return rateIsFalse + rateIsNull + } + return rateFull - rateIsFalse - rateIsNull } func computePatternInFilterRate(expr *ast.PatternInExpr) float64 { if len(expr.List) > 0 { - return 0.01 * float64(len(expr.List)) + rate := rateEqual * float64(len(expr.List)) + if expr.Not { + return rateFull - rate + } + return rate } - return 1 + return rateFull } func computePatternLikeFilterRate(expr *ast.PatternLikeExpr) float64 { - return 0.1 + if expr.Not { + return rateFull - rateLike + } + return rateLike } diff --git a/optimizer/plan/plan_test.go b/optimizer/plan/plan_test.go index 90bb17a9d2..15953d1af6 100644 --- a/optimizer/plan/plan_test.go +++ b/optimizer/plan/plan_test.go @@ -15,7 +15,6 @@ package plan import ( "fmt" - "strings" "testing" . "github.com/pingcap/check" @@ -198,64 +197,35 @@ func (s *testPlanSuite) TestRangeBuilder(c *C) { } } -func (s *testPlanSuite) TestBuilder(c *C) { - c.Skip("for new builder") +func (s *testPlanSuite) TestFilterRate(c *C) { cases := []struct { - sqlStr string - planStr string + expr string + rate float64 }{ - { - sqlStr: "select 1", - planStr: "Fields", - }, - { - sqlStr: "select a from t", - planStr: "Table(t)->Fields", - }, - { - sqlStr: "select a from t where a = 1", - planStr: "Table(t)->Filter->Fields", - }, - { - sqlStr: "select a from t where a = 1 order by a", - planStr: "Table(t)->Filter->Fields->Sort", - }, - { - sqlStr: "select a from t where a = 1 order by a limit 1", - planStr: "Table(t)->Filter->Fields->Sort->Limit", - }, - { - sqlStr: "select a from t where a = 1 limit 1", - planStr: "Table(t)->Filter->Fields->Limit", - }, - { - sqlStr: "select a from t where a = 1 limit 1 for update", - planStr: "Table(t)->Filter->Lock->Fields->Limit", - }, - { - sqlStr: "admin show ddl", - planStr: "ShowDDL", - }, - { - sqlStr: "admin check table t", - planStr: "CheckTable", - }, + {expr: "a = 1", rate: rateEqual}, + {expr: "a > 1", rate: rateGreaterOrLess}, + {expr: "a between 1 and 100", rate: rateBetween}, + {expr: "a is null", rate: rateIsNull}, + {expr: "a is not null", rate: rateFull - rateIsNull}, + {expr: "a is true", rate: rateFull - rateIsNull - rateIsFalse}, + {expr: "a is not true", rate: rateIsNull + rateIsFalse}, + {expr: "a is false", rate: rateIsFalse}, + {expr: "a is not false", rate: rateFull - rateIsFalse}, + {expr: "a like 'a'", rate: rateLike}, + {expr: "a not like 'a'", rate: rateFull - rateLike}, + {expr: "a in (1, 2, 3)", rate: rateEqual * 3}, + {expr: "a not in (1, 2, 3)", rate: rateFull - rateEqual*3}, + {expr: "a > 1 and a < 9", rate: float64(rateGreaterOrLess) * float64(rateGreaterOrLess)}, + {expr: "a = 1 or a = 2", rate: rateEqual + rateEqual}, + {expr: "a != 1", rate: rateNotEqual}, } - var stmt ast.StmtNode for _, ca := range cases { - s, err := parser.ParseOneStmt(ca.sqlStr, "", "") - c.Assert(err, IsNil, Commentf("for expr %s", ca.sqlStr)) - if strings.HasPrefix(ca.sqlStr, "select") { - stmt = s.(*ast.SelectStmt) - } else if strings.HasPrefix(ca.sqlStr, "admin") { - stmt = s.(*ast.AdminStmt) - } - mockResolve(stmt) - p, err := BuildPlan(stmt) - c.Assert(err, IsNil) - explainStr, err := Explain(p) - c.Assert(err, IsNil) - c.Assert(explainStr, Equals, ca.planStr, Commentf("for expr %s", ca.sqlStr)) + sql := "select 1 from dual where " + ca.expr + s, err := parser.ParseOneStmt(sql, "", "") + c.Assert(err, IsNil, Commentf("for expr %s", ca.expr)) + stmt := s.(*ast.SelectStmt) + rate := computeFilterRate(stmt.Where) + c.Assert(rate, Equals, ca.rate, Commentf("for expr %s", ca.expr)) } } @@ -274,42 +244,53 @@ func (s *testPlanSuite) TestBestPlan(c *C) { }, { sql: "select * from t where b = 1 order by a", - best: "Index(t.b)->Filter->Fields->Sort", + best: "Index(t.b)->Fields->Sort", }, { sql: "select * from t where (a between 1 and 2) and (b = 3)", - best: "Index(t.b)->Filter->Fields", + best: "Index(t.b)->Fields", }, { sql: "select * from t where a > 0 order by b limit 100", - best: "Index(t.b)->Filter->Fields->Limit", + best: "Index(t.b)->Fields->Limit", }, { sql: "select * from t where d = 0", - best: "Table(t)->Filter->Fields", + best: "Table(t)->Fields", }, { sql: "select * from t where c = 0 and d = 0", - best: "Index(t.c_d)->Filter->Fields", + best: "Index(t.c_d)->Fields", }, { sql: "select * from t where b like 'abc%'", - best: "Index(t.b)->Filter->Fields", + best: "Index(t.b)->Fields", }, { sql: "select * from t where d", - best: "Table(t)->Filter->Fields", + best: "Table(t)->Fields", }, { sql: "select * from t where a is null", - best: "Range(t)->Filter->Fields", + best: "Range(t)->Fields", + }, + { + sql: "select a from t where a = 1 limit 1 for update", + best: "Range(t)->Lock->Fields->Limit", + }, + { + sql: "admin show ddl", + best: "ShowDDL", + }, + { + sql: "admin check table t", + best: "CheckTable", }, } for _, ca := range cases { comment := Commentf("for %s", ca.sql) - s, err := parser.ParseOneStmt(ca.sql, "", "") + stmt, err := parser.ParseOneStmt(ca.sql, "", "") c.Assert(err, IsNil, comment) - stmt := s.(*ast.SelectStmt) ast.SetFlag(stmt) mockResolve(stmt) diff --git a/optimizer/plan/planbuilder.go b/optimizer/plan/planbuilder.go index 40db6bfd5b..93260fdfbf 100644 --- a/optimizer/plan/planbuilder.go +++ b/optimizer/plan/planbuilder.go @@ -139,12 +139,6 @@ func (b *planBuilder) buildSelect(sel *ast.SelectStmt) Plan { if b.err != nil { return nil } - if sel.Where != nil { - p = b.buildFilter(p, sel.Where) - if b.err != nil { - return nil - } - } if sel.LockTp != ast.SelectLockNone { p = b.buildSelectLock(p, sel.LockTp) if b.err != nil { @@ -166,12 +160,6 @@ func (b *planBuilder) buildSelect(sel *ast.SelectStmt) Plan { if b.err != nil { return nil } - if sel.Where != nil { - p = b.buildFilter(p, sel.Where) - if b.err != nil { - return nil - } - } } if sel.OrderBy != nil && !matchOrder(p, sel.OrderBy.Items) { p = b.buildSort(p, sel.OrderBy.Items) @@ -290,15 +278,6 @@ func (b *planBuilder) buildPseudoSelectPlan(p Plan, sel *ast.SelectStmt) Plan { return p } -func (b *planBuilder) buildFilter(src Plan, where ast.ExprNode) *Filter { - filter := &Filter{ - Conditions: splitWhere(where), - } - filter.SetSrc(src) - filter.SetFields(src.Fields()) - return filter -} - func (b *planBuilder) buildSelectLock(src Plan, lock ast.SelectLockType) *SelectLock { selectLock := &SelectLock{ Lock: lock, @@ -480,6 +459,7 @@ func matchOrder(p Plan, items []*ast.ByItem) bool { func splitWhere(where ast.ExprNode) []ast.ExprNode { var conditions []ast.ExprNode switch x := where.(type) { + case nil: case *ast.BinaryOperationExpr: if x.Op == opcode.AndAnd { conditions = append(conditions, x.L) diff --git a/optimizer/plan/plans.go b/optimizer/plan/plans.go index 994073c6cd..4be3d50ac5 100644 --- a/optimizer/plan/plans.go +++ b/optimizer/plan/plans.go @@ -138,38 +138,6 @@ func (p *IndexScan) Accept(v Visitor) (Plan, bool) { return v.Leave(np) } -// Filter represents a filter plan. -type Filter struct { - planWithSrc - - // Originally the WHERE or ON condition is parsed into a single expression, - // but after we converted to CNF(Conjunctive normal form), it can be - // split into a list of AND conditions. - Conditions []ast.ExprNode -} - -// Accept implements Plan Accept interface. -func (p *Filter) Accept(v Visitor) (Plan, bool) { - np, skip := v.Enter(p) - if skip { - v.Leave(np) - } - p = np.(*Filter) - var ok bool - p.src, ok = p.src.Accept(v) - if !ok { - return p, false - } - return v.Leave(p) -} - -// SetLimit implements Plan SetLimit interface. -func (p *Filter) SetLimit(limit float64) { - p.limit = limit - // We assume 50% of the src row is filtered out. - p.src.SetLimit(limit * 2) -} - // SelectLock represents a select lock plan. type SelectLock struct { planWithSrc diff --git a/session_test.go b/session_test.go index 698e3f79fe..e8e15a7ff9 100644 --- a/session_test.go +++ b/session_test.go @@ -1366,7 +1366,7 @@ func (s *testSessionSuite) TestMultiColumnIndex(c *C) { mustExecSQL(c, se, "insert into t values (1, 5)") sql := "select c1 from t where c1 in (1) and c2 < 10" - expectedExplain := "Index(t.idx_c1_c2)->Filter->Fields" + expectedExplain := "Index(t.idx_c1_c2)->Fields" checkPlan(c, se, sql, expectedExplain) mustExecMatch(c, se, sql, [][]interface{}{{1}}) From ac756f5b874b6cbcfb6cfc0d16b550b6136a98f4 Mon Sep 17 00:00:00 2001 From: Ewan Chou Date: Mon, 18 Jan 2016 16:50:22 +0800 Subject: [PATCH 4/8] optimizer/plan: fix range builder when compare value to null. --- optimizer/plan/plan_test.go | 4 ++++ optimizer/plan/range.go | 3 +++ 2 files changed, 7 insertions(+) diff --git a/optimizer/plan/plan_test.go b/optimizer/plan/plan_test.go index 15953d1af6..22dd42f902 100644 --- a/optimizer/plan/plan_test.go +++ b/optimizer/plan/plan_test.go @@ -183,6 +183,10 @@ func (s *testPlanSuite) TestRangeBuilder(c *C) { exprStr: `(a < 0 OR a > 3) AND (a < 1 OR a > 4)`, resultStr: `[[-inf 0) (4 +inf]]`, }, + { + exprStr: `a > NULL`, + resultStr: `[]`, + }, } for _, ca := range cases { diff --git a/optimizer/plan/range.go b/optimizer/plan/range.go index 86d699ba75..d46db9da66 100644 --- a/optimizer/plan/range.go +++ b/optimizer/plan/range.go @@ -163,6 +163,9 @@ func (r *rangeBuilder) buildFromBinop(x *ast.BinaryOperationExpr) []rangePoint { value = x.R.GetValue() op = x.Op } + if value == nil { + return nil + } switch op { case opcode.EQ: startPoint := rangePoint{value: value, start: true} From 070ef998fe561b925a019edf9598df768498a133 Mon Sep 17 00:00:00 2001 From: Ewan Chou Date: Mon, 18 Jan 2016 20:22:58 +0800 Subject: [PATCH 5/8] optimizer/plan: address comment. --- optimizer/plan/filterrate.go | 6 +++--- optimizer/plan/planbuilder.go | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/optimizer/plan/filterrate.go b/optimizer/plan/filterrate.go index f489fb0d82..33d48e1738 100644 --- a/optimizer/plan/filterrate.go +++ b/optimizer/plan/filterrate.go @@ -31,13 +31,15 @@ const ( // computeFilterRate computes the filter rate for an expression. // It only depends on the expression type, not the expression value. -// The expr parameter should contains only one column name. +// The expr parameter should contain only one column name. func computeFilterRate(expr ast.ExprNode) float64 { switch x := expr.(type) { case *ast.BetweenExpr: return rateBetween case *ast.BinaryOperationExpr: return computeBinopFilterRate(x) + case *ast.ColumnNameExpr: + return rateFull case *ast.IsNullExpr: return computeIsNullFilterRate(x) case *ast.IsTruthExpr: @@ -48,8 +50,6 @@ func computeFilterRate(expr ast.ExprNode) float64 { return computePatternInFilterRate(x) case *ast.PatternLikeExpr: return computePatternLikeFilterRate(x) - case *ast.ColumnNameExpr: - return rateFull } return rateFull } diff --git a/optimizer/plan/planbuilder.go b/optimizer/plan/planbuilder.go index 93260fdfbf..6f0d15fa5a 100644 --- a/optimizer/plan/planbuilder.go +++ b/optimizer/plan/planbuilder.go @@ -435,8 +435,7 @@ func matchOrder(p Plan, items []*ast.ByItem) bool { rf = y.Refer case *ast.PositionExpr: rf = y.Refer - } - if rf == nil { + default: return false } if rf.Table.Name.L != x.Table.Name.L || rf.Column.Name.L != x.Index.Columns[i].Name.L { From 3e14b7e7df33e82afcdea622a153d385550c4501 Mon Sep 17 00:00:00 2001 From: Ewan Chou Date: Tue, 19 Jan 2016 11:35:37 +0800 Subject: [PATCH 6/8] optimizer/plan: address comments --- optimizer/plan/cost.go | 4 +-- optimizer/plan/filterrate.go | 58 ++++++++++++++++++----------------- optimizer/plan/plan_test.go | 4 +-- optimizer/plan/planbuilder.go | 39 ++++++++++++++--------- 4 files changed, 58 insertions(+), 47 deletions(-) diff --git a/optimizer/plan/cost.go b/optimizer/plan/cost.go index 217f24fa28..88f80ce04f 100644 --- a/optimizer/plan/cost.go +++ b/optimizer/plan/cost.go @@ -74,7 +74,7 @@ func (c *costEstimator) Leave(p Plan) (Plan, bool) { func (c *costEstimator) tableScan(v *TableScan) { var rowCount float64 = FullRangeCount for _, con := range v.AccessConditions { - rowCount *= computeFilterRate(con) + rowCount *= guesstimateFilterRate(con) } v.startupCost = 0 if v.limit == 0 { @@ -89,7 +89,7 @@ func (c *costEstimator) tableScan(v *TableScan) { func (c *costEstimator) indexScan(v *IndexScan) { var rowCount float64 = FullRangeCount for _, con := range v.AccessConditions { - rowCount *= computeFilterRate(con) + rowCount *= guesstimateFilterRate(con) } v.startupCost = 0 if v.limit == 0 { diff --git a/optimizer/plan/filterrate.go b/optimizer/plan/filterrate.go index 33d48e1738..244309a46e 100644 --- a/optimizer/plan/filterrate.go +++ b/optimizer/plan/filterrate.go @@ -19,51 +19,53 @@ import ( ) const ( - rateFull = 1 - rateEqual = 0.001 - rateNotEqual = 0.999 - rateBetween = 0.3 - rateGreaterOrLess = 0.4 - rateIsFalse = 0.01 - rateIsNull = 0.01 - rateLike = 0.1 + rateFull float64 = 1 + rateEqual float64 = 0.01 + rateNotEqual float64 = 0.99 + rateBetween float64 = 0.1 + rateGreaterOrLess float64 = 0.33 + rateIsFalse float64 = 0.1 + rateIsNull float64 = 0.1 + rateLike float64 = 0.1 ) -// computeFilterRate computes the filter rate for an expression. +// guesstimateFilterRate guesstimates the filter rate for an expression. +// For example: a table has 100 rows, after filter expression 'a between 0 and 9', +// 10 rows returned, then the filter rate is '0.1'. // It only depends on the expression type, not the expression value. // The expr parameter should contain only one column name. -func computeFilterRate(expr ast.ExprNode) float64 { +func guesstimateFilterRate(expr ast.ExprNode) float64 { switch x := expr.(type) { case *ast.BetweenExpr: return rateBetween case *ast.BinaryOperationExpr: - return computeBinopFilterRate(x) + return guesstimateBinop(x) case *ast.ColumnNameExpr: return rateFull case *ast.IsNullExpr: - return computeIsNullFilterRate(x) + return guesstimateIsNull(x) case *ast.IsTruthExpr: - return computeIsTrueFilterRate(x) + return guesstimateIsTrue(x) case *ast.ParenthesesExpr: - return computeFilterRate(x.Expr) + return guesstimateFilterRate(x.Expr) case *ast.PatternInExpr: - return computePatternInFilterRate(x) + return guesstimatePatternIn(x) case *ast.PatternLikeExpr: - return computePatternLikeFilterRate(x) + return guesstimatePatternLike(x) } return rateFull } -func computeBinopFilterRate(expr *ast.BinaryOperationExpr) float64 { +func guesstimateBinop(expr *ast.BinaryOperationExpr) float64 { switch expr.Op { case opcode.AndAnd: - return computeFilterRate(expr.L) * computeFilterRate(expr.R) + // P(A and B) = P(A) * P(B) + return guesstimateFilterRate(expr.L) * guesstimateFilterRate(expr.R) case opcode.OrOr: - rate := computeFilterRate(expr.L) + computeFilterRate(expr.R) - if rate > rateFull { - rate = rateFull - } - return rate + // P(A or B) = P(A) + P(B) – P(A and B) + rateL := guesstimateFilterRate(expr.L) + rateR := guesstimateFilterRate(expr.R) + return rateL + rateR - rateL*rateR case opcode.EQ: return rateEqual case opcode.GT, opcode.GE, opcode.LT, opcode.LE: @@ -71,17 +73,17 @@ func computeBinopFilterRate(expr *ast.BinaryOperationExpr) float64 { case opcode.NE: return rateNotEqual } - return 1 + return rateFull } -func computeIsNullFilterRate(expr *ast.IsNullExpr) float64 { +func guesstimateIsNull(expr *ast.IsNullExpr) float64 { if expr.Not { return rateFull - rateIsNull } return rateIsNull } -func computeIsTrueFilterRate(expr *ast.IsTruthExpr) float64 { +func guesstimateIsTrue(expr *ast.IsTruthExpr) float64 { if expr.True == 0 { if expr.Not { return rateFull - rateIsFalse @@ -94,7 +96,7 @@ func computeIsTrueFilterRate(expr *ast.IsTruthExpr) float64 { return rateFull - rateIsFalse - rateIsNull } -func computePatternInFilterRate(expr *ast.PatternInExpr) float64 { +func guesstimatePatternIn(expr *ast.PatternInExpr) float64 { if len(expr.List) > 0 { rate := rateEqual * float64(len(expr.List)) if expr.Not { @@ -105,7 +107,7 @@ func computePatternInFilterRate(expr *ast.PatternInExpr) float64 { return rateFull } -func computePatternLikeFilterRate(expr *ast.PatternLikeExpr) float64 { +func guesstimatePatternLike(expr *ast.PatternLikeExpr) float64 { if expr.Not { return rateFull - rateLike } diff --git a/optimizer/plan/plan_test.go b/optimizer/plan/plan_test.go index 22dd42f902..25fdd4611b 100644 --- a/optimizer/plan/plan_test.go +++ b/optimizer/plan/plan_test.go @@ -220,7 +220,7 @@ func (s *testPlanSuite) TestFilterRate(c *C) { {expr: "a in (1, 2, 3)", rate: rateEqual * 3}, {expr: "a not in (1, 2, 3)", rate: rateFull - rateEqual*3}, {expr: "a > 1 and a < 9", rate: float64(rateGreaterOrLess) * float64(rateGreaterOrLess)}, - {expr: "a = 1 or a = 2", rate: rateEqual + rateEqual}, + {expr: "a = 1 or a = 2", rate: rateEqual + rateEqual - rateEqual*rateEqual}, {expr: "a != 1", rate: rateNotEqual}, } for _, ca := range cases { @@ -228,7 +228,7 @@ func (s *testPlanSuite) TestFilterRate(c *C) { s, err := parser.ParseOneStmt(sql, "", "") c.Assert(err, IsNil, Commentf("for expr %s", ca.expr)) stmt := s.(*ast.SelectStmt) - rate := computeFilterRate(stmt.Where) + rate := guesstimateFilterRate(stmt.Where) c.Assert(rate, Equals, ca.rate, Commentf("for expr %s", ca.expr)) } } diff --git a/optimizer/plan/planbuilder.go b/optimizer/plan/planbuilder.go index 6f0d15fa5a..14ee4d1e57 100644 --- a/optimizer/plan/planbuilder.go +++ b/optimizer/plan/planbuilder.go @@ -212,6 +212,16 @@ func (b *planBuilder) buildJoin(sel *ast.SelectStmt) Plan { func (b *planBuilder) buildAllAccessMethodsPlan(tn *ast.TableName, conditions []ast.ExprNode) []Plan { var candidates []Plan + p := b.buildTableScanPlan(tn, conditions) + candidates = append(candidates, p) + for _, index := range tn.TableInfo.Indices { + ip := b.buildIndexScanPlan(index, tn, conditions) + candidates = append(candidates, ip) + } + return candidates +} + +func (b *planBuilder) buildTableScanPlan(tn *ast.TableName, conditions []ast.ExprNode) Plan { p := &TableScan{ Table: tn.TableInfo, } @@ -236,24 +246,23 @@ func (b *planBuilder) buildAllAccessMethodsPlan(tn *ast.TableName, conditions [] p.FilterConditions = append(p.FilterConditions, con) } } - candidates = append(candidates, p) + return p +} - for _, index := range tn.TableInfo.Indices { - ip := &IndexScan{Table: tn.TableInfo, Index: index} - ip.SetFields(tn.GetResultFields()) - // Only use first column as access condition for cost estimation, - // In executor, we can try to use second index column to build index range. - checker := conditionChecker{tableName: tn.TableInfo.Name, idx: index, columnOffset: 0} - for _, con := range conditions { - if checker.check(con) { - ip.AccessConditions = append(ip.AccessConditions, con) - } else { - ip.FilterConditions = append(ip.FilterConditions, con) - } +func (b *planBuilder) buildIndexScanPlan(index *model.IndexInfo, tn *ast.TableName, conditions []ast.ExprNode) Plan { + ip := &IndexScan{Table: tn.TableInfo, Index: index} + ip.SetFields(tn.GetResultFields()) + // Only use first column as access condition for cost estimation, + // In executor, we can try to use second index column to build index range. + checker := conditionChecker{tableName: tn.TableInfo.Name, idx: index, columnOffset: 0} + for _, con := range conditions { + if checker.check(con) { + ip.AccessConditions = append(ip.AccessConditions, con) + } else { + ip.FilterConditions = append(ip.FilterConditions, con) } - candidates = append(candidates, ip) } - return candidates + return ip } // buildPseudoSelectPlan pre-builds more complete plans that may affect total cost. From c4e1e2a2a9fcb65180e75f9aefb7d3dea47be292 Mon Sep 17 00:00:00 2001 From: Ewan Chou Date: Tue, 19 Jan 2016 11:40:40 +0800 Subject: [PATCH 7/8] optimizer/plan: sort switch case order. --- optimizer/plan/planbuilder.go | 40 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/optimizer/plan/planbuilder.go b/optimizer/plan/planbuilder.go index 14ee4d1e57..4657800dbd 100644 --- a/optimizer/plan/planbuilder.go +++ b/optimizer/plan/planbuilder.go @@ -410,25 +410,7 @@ func buildResultField(tableName, name string, tp byte, size int) *ast.ResultFiel // matchOrder checks if the plan has the same ordering as items. func matchOrder(p Plan, items []*ast.ByItem) bool { switch x := p.(type) { - case *TableScan: - if len(items) != 1 || !x.Table.PKIsHandle { - return false - } - if items[0].Desc { - return false - } - var refer *ast.ResultField - switch x := items[0].Expr.(type) { - case *ast.ColumnNameExpr: - refer = x.Refer - case *ast.PositionExpr: - refer = x.Refer - default: - return false - } - if mysql.HasPriKeyFlag(refer.Column.Flag) { - return true - } + case *Aggregate: return false case *IndexScan: if len(items) > len(x.Index.Columns) { @@ -452,7 +434,25 @@ func matchOrder(p Plan, items []*ast.ByItem) bool { } } return true - case *Aggregate: + case *TableScan: + if len(items) != 1 || !x.Table.PKIsHandle { + return false + } + if items[0].Desc { + return false + } + var refer *ast.ResultField + switch x := items[0].Expr.(type) { + case *ast.ColumnNameExpr: + refer = x.Refer + case *ast.PositionExpr: + refer = x.Refer + default: + return false + } + if mysql.HasPriKeyFlag(refer.Column.Flag) { + return true + } return false case *Sort: // Sort plan should not be checked here as there should only be one sort plan in a plan tree. From 17d3fdbca28833774d09e61a4847bdae0e7cbd93 Mon Sep 17 00:00:00 2001 From: Ewan Chou Date: Tue, 19 Jan 2016 12:37:13 +0800 Subject: [PATCH 8/8] executor: extract buildFilter method. --- executor/builder.go | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/executor/builder.go b/executor/builder.go index ca1a817aea..d96046818c 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -75,6 +75,17 @@ func (b *executorBuilder) build(p plan.Plan) Executor { } } +func (b *executorBuilder) buildFilter(src Executor, conditions []ast.ExprNode) Executor { + if len(conditions) == 0 { + return src + } + return &FilterExec{ + Src: src, + Condition: b.joinConditions(conditions), + ctx: b.ctx, + } +} + func (b *executorBuilder) buildTableScan(v *plan.TableScan) Executor { table, _ := b.is.TableByID(v.Table.ID) e := &TableScanExec{ @@ -84,15 +95,7 @@ func (b *executorBuilder) buildTableScan(v *plan.TableScan) Executor { ranges: v.Ranges, seekHandle: math.MinInt64, } - if len(v.FilterConditions) != 0 { - fe := &FilterExec{ - Src: e, - Condition: b.joinConditions(v.FilterConditions), - ctx: b.ctx, - } - return fe - } - return e + return b.buildFilter(e, v.FilterConditions) } func (b *executorBuilder) buildShowDDL(v *plan.ShowDDL) Executor { @@ -143,15 +146,7 @@ func (b *executorBuilder) buildIndexScan(v *plan.IndexScan) Executor { for i, val := range v.Ranges { e.Ranges[i] = b.buildIndexRange(e, val) } - if len(v.FilterConditions) != 0 { - fe := &FilterExec{ - Src: e, - Condition: b.joinConditions(v.FilterConditions), - ctx: b.ctx, - } - return fe - } - return e + return b.buildFilter(e, v.FilterConditions) } func (b *executorBuilder) buildIndexRange(scan *IndexScanExec, v *plan.IndexRange) *IndexRangeExec {