From 8dfdab3a2efb95edf4d521ca88049d2f2bed0f01 Mon Sep 17 00:00:00 2001 From: Zhang Jian Date: Sun, 11 Mar 2018 11:10:38 +0800 Subject: [PATCH] plan: tiny code refine for building DataSource (#5977) --- plan/logical_plan_builder.go | 105 ++++++++++++++++++++++------------- plan/logical_plans.go | 4 +- plan/planbuilder.go | 77 ++++++++++++------------- statistics/table.go | 14 +++-- 4 files changed, 110 insertions(+), 90 deletions(-) diff --git a/plan/logical_plan_builder.go b/plan/logical_plan_builder.go index e1b39f595c..c6b2e694fd 100644 --- a/plan/logical_plan_builder.go +++ b/plan/logical_plan_builder.go @@ -1579,43 +1579,58 @@ func (ds *DataSource) newExtraHandleSchemaCol() *expression.Column { // and use pseudo estimation. var RatioOfPseudoEstimate = 0.7 +// getStatsTable gets statistics information for a table specified by "tableID". +// A pseudo statistics table is returned in any of the following scenario: +// 1. tidb-server started and statistics handle has not been initialized. +// 2. table row count from statistics is zero. +// 3. statistics is outdated. +func (b *planBuilder) getStatsTable(tableID int64) *statistics.Table { + statsHandle := domain.GetDomain(b.ctx).StatsHandle() + + // 1. tidb-server started and statistics handle has not been initialized. + if statsHandle == nil { + return statistics.PseudoTable(tableID) + } + + statsTbl := statsHandle.GetTableStats(tableID) + + // 2. table row count from statistics is zero. + if statsTbl.Count == 0 { + return statistics.PseudoTable(tableID) + } + + // 3. statistics is outdated. + if float64(statsTbl.ModifyCount)/float64(statsTbl.Count) > RatioOfPseudoEstimate { + countFromStats := statsTbl.Count + statsTbl = statistics.PseudoTable(tableID) + // Table row count from statistics is more meaningful than the + // pseudo row count in most cases. + statsTbl.Count = countFromStats + metrics.PseudoEstimation.Inc() + } + return statsTbl +} + func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { - schemaName := tn.Schema - if schemaName.L == "" { - schemaName = model.NewCIStr(b.ctx.GetSessionVars().CurrentDB) + dbName := tn.Schema + if dbName.L == "" { + dbName = model.NewCIStr(b.ctx.GetSessionVars().CurrentDB) } - tbl, err := b.is.TableByName(schemaName, tn.Name) + + tbl, err := b.is.TableByName(dbName, tn.Name) if err != nil { b.err = errors.Trace(err) return nil } + tableInfo := tbl.Meta() - handle := domain.GetDomain(b.ctx).StatsHandle() - var statsTbl *statistics.Table - if handle == nil { - // When the first session is created, the handle hasn't been initialized. - statsTbl = statistics.PseudoTable(tableInfo.ID) - } else { - statsTbl = handle.GetTableStats(tableInfo.ID) - if statsTbl.Count == 0 || float64(statsTbl.ModifyCount)/float64(statsTbl.Count) > RatioOfPseudoEstimate { - originCnt := statsTbl.Count - statsTbl = statistics.PseudoTable(tableInfo.ID) - if originCnt > 0 { - // The count of stats table is always proper. - statsTbl.Count = originCnt - } else { - // Zero count always brings some strange problem. - statsTbl.Count = 100 - } - metrics.PseudoEstimation.Inc() - } - } - indices, includeTableScan, err := availableIndices(tn.IndexHints, tableInfo) + b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "") + + availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo) if err != nil { b.err = errors.Trace(err) return nil } - avalableIndices := avalableIndices{indices: indices, includeTableScan: includeTableScan} var columns []*table.Column if b.inUpdateStmt { @@ -1623,15 +1638,15 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { } else { columns = tbl.Cols() } + ds := DataSource{ - indexHints: tn.IndexHints, + DBName: dbName, tableInfo: tableInfo, - statisticTable: statsTbl, - DBName: schemaName, + statisticTable: b.getStatsTable(tableInfo.ID), + indexHints: tn.IndexHints, + availableIndices: availableIdxes, Columns: make([]*model.ColumnInfo, 0, len(columns)), - availableIndices: &avalableIndices, }.init(b.ctx) - b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, schemaName.L, tableInfo.Name.L, "") var handleCol *expression.Column schema := expression.NewSchema(make([]*expression.Column, 0, len(columns))...) @@ -1639,21 +1654,24 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { ds.Columns = append(ds.Columns, col.ToInfo()) schema.Append(&expression.Column{ FromID: ds.id, - ColName: col.Name, - TblName: tableInfo.Name, - DBName: schemaName, - RetType: &col.FieldType, Position: i, - ID: col.ID}) + DBName: dbName, + TblName: tableInfo.Name, + ColName: col.Name, + ID: col.ID, + RetType: &col.FieldType, + }) + if tableInfo.PKIsHandle && mysql.HasPriKeyFlag(col.Flag) { - handleCol = schema.Columns[schema.Len()-1] + handleCol = schema.Columns[i] } } ds.SetSchema(schema) - isMemDB := infoschema.IsMemoryDB(ds.DBName.L) + // We append an extra handle column to the schema when "ds" is not a memory // table e.g. table in the "INFORMATION_SCHEMA" database, and the handle // column is not the primary key of "ds". + isMemDB := infoschema.IsMemoryDB(ds.DBName.L) if !isMemDB && handleCol == nil { ds.Columns = append(ds.Columns, model.NewExtraHandleColInfo()) handleCol = ds.newExtraHandleSchemaCol() @@ -1662,13 +1680,20 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan { if handleCol != nil { schema.TblID2Handle[tableInfo.ID] = []*expression.Column{handleCol} } - // make plan as DS -> US -> Proj + var result LogicalPlan = ds + + // If this SQL is executed in a non-readonly transaction, we need a + // "UnionScan" operator to read the modifications of former SQLs, which is + // buffered in tidb-server memory. if b.ctx.Txn() != nil && !b.ctx.Txn().IsReadOnly() { us := LogicalUnionScan{}.init(b.ctx) - us.SetChildren(result) + us.SetChildren(ds) result = us } + + // If this table contains any virtual generated columns, we need a + // "Projection" to calculate these columns. proj := b.projectVirtualColumns(ds, columns) if proj != nil { proj.SetChildren(result) diff --git a/plan/logical_plans.go b/plan/logical_plans.go index 6a15116b69..c3bb30d954 100644 --- a/plan/logical_plans.go +++ b/plan/logical_plans.go @@ -291,10 +291,10 @@ type DataSource struct { statisticTable *statistics.Table // availableIndices is used for storing result of availableIndices function. - availableIndices *avalableIndices + availableIndices *availableIndices } -type avalableIndices struct { +type availableIndices struct { indices []*model.IndexInfo includeTableScan bool } diff --git a/plan/planbuilder.go b/plan/planbuilder.go index 1186adc248..0ca8c2d163 100644 --- a/plan/planbuilder.go +++ b/plan/planbuilder.go @@ -313,62 +313,55 @@ func (b *planBuilder) detectSelectAgg(sel *ast.SelectStmt) bool { return false } -func availableIndices(hints []*ast.IndexHint, tableInfo *model.TableInfo) (indices []*model.IndexInfo, includeTableScan bool, err error) { - var usableHints []*ast.IndexHint - for _, hint := range hints { - if hint.HintScope == ast.HintForScan { - usableHints = append(usableHints, hint) - } - } +func getAvailableIndices(indexHints []*ast.IndexHint, tableInfo *model.TableInfo) (*availableIndices, error) { publicIndices := make([]*model.IndexInfo, 0, len(tableInfo.Indices)) for _, index := range tableInfo.Indices { if index.State == model.StatePublic { publicIndices = append(publicIndices, index) } } - if len(usableHints) == 0 { - return publicIndices, true, nil - } - var hasUse bool - var ignores []*model.IndexInfo - for _, hint := range usableHints { - switch hint.HintType { - case ast.HintUse, ast.HintForce: - // Currently we don't distinguish between Force and Use because our cost estimation is not reliable. - hasUse = true - for _, idxName := range hint.IndexNames { - idx := findIndexByName(publicIndices, idxName) - if idx != nil { - indices = append(indices, idx) - } else { - return nil, true, ErrKeyDoesNotExist.GenByArgs(idxName, tableInfo.Name) - } + + hasScanHint, hasUseOrForce := false, false + available := make([]*model.IndexInfo, 0, len(indexHints)) + ignored := make([]*model.IndexInfo, 0, len(indexHints)) + for _, hint := range indexHints { + if hint.HintScope != ast.HintForScan { + continue + } + + hasScanHint = true + for _, idxName := range hint.IndexNames { + idx := findIndexByName(publicIndices, idxName) + if idx == nil { + return nil, ErrKeyDoesNotExist.GenByArgs(idxName, tableInfo.Name) } - case ast.HintIgnore: - // Collect all the ignore index hints. - for _, idxName := range hint.IndexNames { - idx := findIndexByName(publicIndices, idxName) - if idx != nil { - ignores = append(ignores, idx) - } else { - return nil, true, ErrKeyDoesNotExist.GenByArgs(idxName, tableInfo.Name) - } + if hint.HintType == ast.HintIgnore { + // Collect all the ignored index hints. + ignored = append(ignored, idx) + continue } + // Currently we don't distinguish between "FORCE" and "USE" because + // our cost estimation is not reliable. + hasUseOrForce = true + available = append(available, idx) } } - indices = removeIgnores(indices, ignores) - // If we have got FORCE or USE index hint, table scan is excluded. - if len(indices) != 0 { - return indices, false, nil + + if !hasScanHint { + return &availableIndices{publicIndices, true}, nil } - if hasUse { - // Empty use hint means don't use any index. - return nil, true, nil + if !hasUseOrForce { + available = removeIgnoredIndices(publicIndices, ignored) + return &availableIndices{available, true}, nil } - return removeIgnores(publicIndices, ignores), true, nil + + available = removeIgnoredIndices(available, ignored) + // If we have got "FORCE" or "USE" index hint but got no available index, + // we have to use table scan. + return &availableIndices{available, len(available) == 0}, nil } -func removeIgnores(indices, ignores []*model.IndexInfo) []*model.IndexInfo { +func removeIgnoredIndices(indices, ignores []*model.IndexInfo) []*model.IndexInfo { if len(ignores) == 0 { return indices } diff --git a/statistics/table.go b/statistics/table.go index ee7016f9d0..9c4f5774d9 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -334,13 +334,15 @@ func (t *Table) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int return result, errors.Trace(err) } -// PseudoTable creates a pseudo table statistics when statistic can not be found in KV store. +// PseudoTable creates a pseudo table statistics. func PseudoTable(tableID int64) *Table { - t := &Table{TableID: tableID, Pseudo: true} - t.Count = pseudoRowCount - t.Columns = make(map[int64]*Column) - t.Indices = make(map[int64]*Index) - return t + return &Table{ + TableID: tableID, + Pseudo: true, + Count: pseudoRowCount, + Columns: make(map[int64]*Column), + Indices: make(map[int64]*Index), + } } func getPseudoRowCountByIndexRanges(sc *stmtctx.StatementContext, indexRanges []*ranger.NewRange,