plan: tiny code refine for building DataSource (#5977)

This commit is contained in:
Zhang Jian
2018-03-11 11:10:38 +08:00
committed by GitHub
parent f170c56c03
commit 8dfdab3a2e
4 changed files with 110 additions and 90 deletions

View File

@ -1579,43 +1579,58 @@ func (ds *DataSource) newExtraHandleSchemaCol() *expression.Column {
// and use pseudo estimation.
var RatioOfPseudoEstimate = 0.7
// getStatsTable gets statistics information for a table specified by "tableID".
// A pseudo statistics table is returned in any of the following scenario:
// 1. tidb-server started and statistics handle has not been initialized.
// 2. table row count from statistics is zero.
// 3. statistics is outdated.
func (b *planBuilder) getStatsTable(tableID int64) *statistics.Table {
statsHandle := domain.GetDomain(b.ctx).StatsHandle()
// 1. tidb-server started and statistics handle has not been initialized.
if statsHandle == nil {
return statistics.PseudoTable(tableID)
}
statsTbl := statsHandle.GetTableStats(tableID)
// 2. table row count from statistics is zero.
if statsTbl.Count == 0 {
return statistics.PseudoTable(tableID)
}
// 3. statistics is outdated.
if float64(statsTbl.ModifyCount)/float64(statsTbl.Count) > RatioOfPseudoEstimate {
countFromStats := statsTbl.Count
statsTbl = statistics.PseudoTable(tableID)
// Table row count from statistics is more meaningful than the
// pseudo row count in most cases.
statsTbl.Count = countFromStats
metrics.PseudoEstimation.Inc()
}
return statsTbl
}
func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
schemaName := tn.Schema
if schemaName.L == "" {
schemaName = model.NewCIStr(b.ctx.GetSessionVars().CurrentDB)
dbName := tn.Schema
if dbName.L == "" {
dbName = model.NewCIStr(b.ctx.GetSessionVars().CurrentDB)
}
tbl, err := b.is.TableByName(schemaName, tn.Name)
tbl, err := b.is.TableByName(dbName, tn.Name)
if err != nil {
b.err = errors.Trace(err)
return nil
}
tableInfo := tbl.Meta()
handle := domain.GetDomain(b.ctx).StatsHandle()
var statsTbl *statistics.Table
if handle == nil {
// When the first session is created, the handle hasn't been initialized.
statsTbl = statistics.PseudoTable(tableInfo.ID)
} else {
statsTbl = handle.GetTableStats(tableInfo.ID)
if statsTbl.Count == 0 || float64(statsTbl.ModifyCount)/float64(statsTbl.Count) > RatioOfPseudoEstimate {
originCnt := statsTbl.Count
statsTbl = statistics.PseudoTable(tableInfo.ID)
if originCnt > 0 {
// The count of stats table is always proper.
statsTbl.Count = originCnt
} else {
// Zero count always brings some strange problem.
statsTbl.Count = 100
}
metrics.PseudoEstimation.Inc()
}
}
indices, includeTableScan, err := availableIndices(tn.IndexHints, tableInfo)
b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, dbName.L, tableInfo.Name.L, "")
availableIdxes, err := getAvailableIndices(tn.IndexHints, tableInfo)
if err != nil {
b.err = errors.Trace(err)
return nil
}
avalableIndices := avalableIndices{indices: indices, includeTableScan: includeTableScan}
var columns []*table.Column
if b.inUpdateStmt {
@ -1623,15 +1638,15 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
} else {
columns = tbl.Cols()
}
ds := DataSource{
indexHints: tn.IndexHints,
DBName: dbName,
tableInfo: tableInfo,
statisticTable: statsTbl,
DBName: schemaName,
statisticTable: b.getStatsTable(tableInfo.ID),
indexHints: tn.IndexHints,
availableIndices: availableIdxes,
Columns: make([]*model.ColumnInfo, 0, len(columns)),
availableIndices: &avalableIndices,
}.init(b.ctx)
b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, schemaName.L, tableInfo.Name.L, "")
var handleCol *expression.Column
schema := expression.NewSchema(make([]*expression.Column, 0, len(columns))...)
@ -1639,21 +1654,24 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
ds.Columns = append(ds.Columns, col.ToInfo())
schema.Append(&expression.Column{
FromID: ds.id,
ColName: col.Name,
TblName: tableInfo.Name,
DBName: schemaName,
RetType: &col.FieldType,
Position: i,
ID: col.ID})
DBName: dbName,
TblName: tableInfo.Name,
ColName: col.Name,
ID: col.ID,
RetType: &col.FieldType,
})
if tableInfo.PKIsHandle && mysql.HasPriKeyFlag(col.Flag) {
handleCol = schema.Columns[schema.Len()-1]
handleCol = schema.Columns[i]
}
}
ds.SetSchema(schema)
isMemDB := infoschema.IsMemoryDB(ds.DBName.L)
// We append an extra handle column to the schema when "ds" is not a memory
// table e.g. table in the "INFORMATION_SCHEMA" database, and the handle
// column is not the primary key of "ds".
isMemDB := infoschema.IsMemoryDB(ds.DBName.L)
if !isMemDB && handleCol == nil {
ds.Columns = append(ds.Columns, model.NewExtraHandleColInfo())
handleCol = ds.newExtraHandleSchemaCol()
@ -1662,13 +1680,20 @@ func (b *planBuilder) buildDataSource(tn *ast.TableName) LogicalPlan {
if handleCol != nil {
schema.TblID2Handle[tableInfo.ID] = []*expression.Column{handleCol}
}
// make plan as DS -> US -> Proj
var result LogicalPlan = ds
// If this SQL is executed in a non-readonly transaction, we need a
// "UnionScan" operator to read the modifications of former SQLs, which is
// buffered in tidb-server memory.
if b.ctx.Txn() != nil && !b.ctx.Txn().IsReadOnly() {
us := LogicalUnionScan{}.init(b.ctx)
us.SetChildren(result)
us.SetChildren(ds)
result = us
}
// If this table contains any virtual generated columns, we need a
// "Projection" to calculate these columns.
proj := b.projectVirtualColumns(ds, columns)
if proj != nil {
proj.SetChildren(result)

View File

@ -291,10 +291,10 @@ type DataSource struct {
statisticTable *statistics.Table
// availableIndices is used for storing result of availableIndices function.
availableIndices *avalableIndices
availableIndices *availableIndices
}
type avalableIndices struct {
type availableIndices struct {
indices []*model.IndexInfo
includeTableScan bool
}

View File

@ -313,62 +313,55 @@ func (b *planBuilder) detectSelectAgg(sel *ast.SelectStmt) bool {
return false
}
func availableIndices(hints []*ast.IndexHint, tableInfo *model.TableInfo) (indices []*model.IndexInfo, includeTableScan bool, err error) {
var usableHints []*ast.IndexHint
for _, hint := range hints {
if hint.HintScope == ast.HintForScan {
usableHints = append(usableHints, hint)
}
}
func getAvailableIndices(indexHints []*ast.IndexHint, tableInfo *model.TableInfo) (*availableIndices, error) {
publicIndices := make([]*model.IndexInfo, 0, len(tableInfo.Indices))
for _, index := range tableInfo.Indices {
if index.State == model.StatePublic {
publicIndices = append(publicIndices, index)
}
}
if len(usableHints) == 0 {
return publicIndices, true, nil
}
var hasUse bool
var ignores []*model.IndexInfo
for _, hint := range usableHints {
switch hint.HintType {
case ast.HintUse, ast.HintForce:
// Currently we don't distinguish between Force and Use because our cost estimation is not reliable.
hasUse = true
for _, idxName := range hint.IndexNames {
idx := findIndexByName(publicIndices, idxName)
if idx != nil {
indices = append(indices, idx)
} else {
return nil, true, ErrKeyDoesNotExist.GenByArgs(idxName, tableInfo.Name)
}
hasScanHint, hasUseOrForce := false, false
available := make([]*model.IndexInfo, 0, len(indexHints))
ignored := make([]*model.IndexInfo, 0, len(indexHints))
for _, hint := range indexHints {
if hint.HintScope != ast.HintForScan {
continue
}
hasScanHint = true
for _, idxName := range hint.IndexNames {
idx := findIndexByName(publicIndices, idxName)
if idx == nil {
return nil, ErrKeyDoesNotExist.GenByArgs(idxName, tableInfo.Name)
}
case ast.HintIgnore:
// Collect all the ignore index hints.
for _, idxName := range hint.IndexNames {
idx := findIndexByName(publicIndices, idxName)
if idx != nil {
ignores = append(ignores, idx)
} else {
return nil, true, ErrKeyDoesNotExist.GenByArgs(idxName, tableInfo.Name)
}
if hint.HintType == ast.HintIgnore {
// Collect all the ignored index hints.
ignored = append(ignored, idx)
continue
}
// Currently we don't distinguish between "FORCE" and "USE" because
// our cost estimation is not reliable.
hasUseOrForce = true
available = append(available, idx)
}
}
indices = removeIgnores(indices, ignores)
// If we have got FORCE or USE index hint, table scan is excluded.
if len(indices) != 0 {
return indices, false, nil
if !hasScanHint {
return &availableIndices{publicIndices, true}, nil
}
if hasUse {
// Empty use hint means don't use any index.
return nil, true, nil
if !hasUseOrForce {
available = removeIgnoredIndices(publicIndices, ignored)
return &availableIndices{available, true}, nil
}
return removeIgnores(publicIndices, ignores), true, nil
available = removeIgnoredIndices(available, ignored)
// If we have got "FORCE" or "USE" index hint but got no available index,
// we have to use table scan.
return &availableIndices{available, len(available) == 0}, nil
}
func removeIgnores(indices, ignores []*model.IndexInfo) []*model.IndexInfo {
func removeIgnoredIndices(indices, ignores []*model.IndexInfo) []*model.IndexInfo {
if len(ignores) == 0 {
return indices
}

View File

@ -334,13 +334,15 @@ func (t *Table) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idxID int
return result, errors.Trace(err)
}
// PseudoTable creates a pseudo table statistics when statistic can not be found in KV store.
// PseudoTable creates a pseudo table statistics.
func PseudoTable(tableID int64) *Table {
t := &Table{TableID: tableID, Pseudo: true}
t.Count = pseudoRowCount
t.Columns = make(map[int64]*Column)
t.Indices = make(map[int64]*Index)
return t
return &Table{
TableID: tableID,
Pseudo: true,
Count: pseudoRowCount,
Columns: make(map[int64]*Column),
Indices: make(map[int64]*Index),
}
}
func getPseudoRowCountByIndexRanges(sc *stmtctx.StatementContext, indexRanges []*ranger.NewRange,