diff --git a/executor/analyze_test.go b/executor/analyze_test.go index 05d4f73689..dfb1194572 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -15,11 +15,10 @@ package executor_test import ( "fmt" - "strings" - . "github.com/pingcap/check" "github.com/pingcap/tidb/executor" "github.com/pingcap/tidb/model" + "github.com/pingcap/tidb/session" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/util/testkit" ) @@ -47,14 +46,19 @@ PARTITION BY RANGE ( a ) ( c.Assert(err, IsNil) pi := table.Meta().GetPartitionInfo() c.Assert(pi, NotNil) - ids := make([]string, 0, len(pi.Definitions)) + do, err := session.GetDomain(s.store) + c.Assert(err, IsNil) + handle := do.StatsHandle() for _, def := range pi.Definitions { - ids = append(ids, fmt.Sprintf("%d", def.ID)) + statsTbl := handle.GetPartitionStats(table.Meta(), def.ID) + c.Assert(statsTbl.Pseudo, IsFalse) + c.Assert(len(statsTbl.Columns), Equals, 2) + c.Assert(len(statsTbl.Indices), Equals, 1) + for _, col := range statsTbl.Columns { + c.Assert(col.Len(), Greater, 0) + } + for _, idx := range statsTbl.Indices { + c.Assert(idx.Len(), Greater, 0) + } } - result := tk.MustQuery(fmt.Sprintf("select count(distinct(table_id)) from mysql.stats_meta where table_id in (%s)", strings.Join(ids, ","))) - result.Check(testkit.Rows(fmt.Sprintf("%d", len(ids)))) - result = tk.MustQuery(fmt.Sprintf("select count(distinct(table_id)) from mysql.stats_histograms where table_id in (%s)", strings.Join(ids, ","))) - result.Check(testkit.Rows(fmt.Sprintf("%d", len(ids)))) - result = tk.MustQuery(fmt.Sprintf("select count(distinct(table_id)) from mysql.stats_buckets where table_id in (%s)", strings.Join(ids, ","))) - result.Check(testkit.Rows(fmt.Sprintf("%d", len(ids)))) } diff --git a/statistics/boostrap.go b/statistics/boostrap.go index 170754c921..0a6862394a 100644 --- a/statistics/boostrap.go +++ b/statistics/boostrap.go @@ -27,30 +27,30 @@ import ( "golang.org/x/net/context" ) -func initStatsMeta4Chunk(is infoschema.InfoSchema, tables statsCache, iter *chunk.Iterator4Chunk) { +func (h *Handle) initStatsMeta4Chunk(is infoschema.InfoSchema, tables statsCache, iter *chunk.Iterator4Chunk) { for row := iter.Begin(); row != iter.End(); row = iter.Next() { - tableID := row.GetInt64(1) - table, ok := is.TableByID(tableID) + physicalID := row.GetInt64(1) + table, ok := h.getTableByPhysicalID(is, physicalID) if !ok { - log.Debugf("Unknown table ID %d in stats meta table, maybe it has been dropped", tableID) + log.Debugf("Unknown physical ID %d in stats meta table, maybe it has been dropped", physicalID) continue } tableInfo := table.Meta() newHistColl := HistColl{ - TableID: tableInfo.ID, - HaveTblID: true, - Count: row.GetInt64(3), - ModifyCount: row.GetInt64(2), - Columns: make(map[int64]*Column, len(tableInfo.Columns)), - Indices: make(map[int64]*Index, len(tableInfo.Indices)), - colName2Idx: make(map[string]int64, len(tableInfo.Columns)), - colName2ID: make(map[string]int64, len(tableInfo.Columns)), + PhysicalID: physicalID, + HavePhysicalID: true, + Count: row.GetInt64(3), + ModifyCount: row.GetInt64(2), + Columns: make(map[int64]*Column, len(tableInfo.Columns)), + Indices: make(map[int64]*Index, len(tableInfo.Indices)), + colName2Idx: make(map[string]int64, len(tableInfo.Columns)), + colName2ID: make(map[string]int64, len(tableInfo.Columns)), } tbl := &Table{ HistColl: newHistColl, Version: row.GetUint64(0), } - tables[tableID] = tbl + tables[physicalID] = tbl } } @@ -76,19 +76,19 @@ func (h *Handle) initStatsMeta(is infoschema.InfoSchema) (statsCache, error) { if chk.NumRows() == 0 { break } - initStatsMeta4Chunk(is, tables, iter) + h.initStatsMeta4Chunk(is, tables, iter) } return tables, nil } -func initStatsHistograms4Chunk(is infoschema.InfoSchema, tables statsCache, iter *chunk.Iterator4Chunk) { +func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables statsCache, iter *chunk.Iterator4Chunk) { for row := iter.Begin(); row != iter.End(); row = iter.Next() { table, ok := tables[row.GetInt64(0)] if !ok { continue } id, ndv, nullCount, version, totColSize := row.GetInt64(2), row.GetInt64(3), row.GetInt64(5), row.GetUint64(4), row.GetInt64(7) - tbl, _ := is.TableByID(table.TableID) + tbl, _ := h.getTableByPhysicalID(is, table.PhysicalID) if row.GetInt64(1) > 0 { var idxInfo *model.IndexInfo for _, idx := range tbl.Meta().Indices { @@ -145,7 +145,7 @@ func (h *Handle) initStatsHistograms(is infoschema.InfoSchema, tables statsCache if chk.NumRows() == 0 { break } - initStatsHistograms4Chunk(is, tables, iter) + h.initStatsHistograms4Chunk(is, tables, iter) } return nil } diff --git a/statistics/dump.go b/statistics/dump.go index 74aa854336..7beff42041 100644 --- a/statistics/dump.go +++ b/statistics/dump.go @@ -58,7 +58,7 @@ func dumpJSONCol(hist *Histogram, CMSketch *CMSketch) *jsonColumn { // DumpStatsToJSON dumps statistic to json. func (h *Handle) DumpStatsToJSON(dbName string, tableInfo *model.TableInfo) (*JSONTable, error) { - tbl, err := h.tableStatsFromStorage(tableInfo, true) + tbl, err := h.tableStatsFromStorage(tableInfo, tableInfo.ID, true) if err != nil { return nil, errors.Trace(err) } @@ -101,18 +101,18 @@ func (h *Handle) LoadStatsFromJSON(is infoschema.InfoSchema, jsonTbl *JSONTable) } for _, col := range tbl.Columns { - err = h.SaveStatsToStorage(tbl.TableID, tbl.Count, 0, &col.Histogram, col.CMSketch, 1) + err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, &col.Histogram, col.CMSketch, 1) if err != nil { return errors.Trace(err) } } for _, idx := range tbl.Indices { - err = h.SaveStatsToStorage(tbl.TableID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, 1) + err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, 1) if err != nil { return errors.Trace(err) } } - err = h.SaveMetaToStorage(tbl.TableID, tbl.Count, tbl.ModifyCount) + err = h.SaveMetaToStorage(tbl.PhysicalID, tbl.Count, tbl.ModifyCount) if err != nil { return errors.Trace(err) } @@ -122,12 +122,12 @@ func (h *Handle) LoadStatsFromJSON(is infoschema.InfoSchema, jsonTbl *JSONTable) // LoadStatsFromJSONToTable load statistic from JSONTable and return the Table of statistic. func (h *Handle) LoadStatsFromJSONToTable(tableInfo *model.TableInfo, jsonTbl *JSONTable) (*Table, error) { newHistColl := HistColl{ - TableID: tableInfo.ID, - HaveTblID: true, - Count: jsonTbl.Count, - ModifyCount: jsonTbl.ModifyCount, - Columns: make(map[int64]*Column, len(jsonTbl.Columns)), - Indices: make(map[int64]*Index, len(jsonTbl.Indices)), + PhysicalID: tableInfo.ID, + HavePhysicalID: true, + Count: jsonTbl.Count, + ModifyCount: jsonTbl.ModifyCount, + Columns: make(map[int64]*Column, len(jsonTbl.Columns)), + Indices: make(map[int64]*Index, len(jsonTbl.Indices)), } tbl := &Table{ HistColl: newHistColl, diff --git a/statistics/handle.go b/statistics/handle.go index 8f3851f889..9785b2a6ee 100644 --- a/statistics/handle.go +++ b/statistics/handle.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/tidb/model" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/store/tikv/oracle" + "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/util/sqlexec" log "github.com/sirupsen/logrus" ) @@ -40,6 +41,10 @@ type Handle struct { lastVersion uint64 // rateMap contains the error rate delta from feedback. rateMap errorRateDeltaMap + // pid2tid is the map from partition ID to table ID. + pid2tid map[int64]int64 + // schemaVersion is the version of information schema when `pid2tid` is built. + schemaVersion int64 } restrictedExec sqlexec.RestrictedSQLExecutor @@ -128,25 +133,27 @@ func (h *Handle) Update(is infoschema.InfoSchema) error { deletedTableIDs := make([]int64, 0, len(rows)) for _, row := range rows { version := row.GetUint64(0) - tableID := row.GetInt64(1) + physicalID := row.GetInt64(1) modifyCount := row.GetInt64(2) count := row.GetInt64(3) lastVersion = version - table, ok := is.TableByID(tableID) + h.mu.Lock() + table, ok := h.getTableByPhysicalID(is, physicalID) + h.mu.Unlock() if !ok { - log.Debugf("Unknown table ID %d in stats meta table, maybe it has been dropped", tableID) - deletedTableIDs = append(deletedTableIDs, tableID) + log.Debugf("Unknown physical ID %d in stats meta table, maybe it has been dropped", physicalID) + deletedTableIDs = append(deletedTableIDs, physicalID) continue } tableInfo := table.Meta() - tbl, err := h.tableStatsFromStorage(tableInfo, false) + tbl, err := h.tableStatsFromStorage(tableInfo, physicalID, false) // Error is not nil may mean that there are some ddl changes on this table, we will not update it. if err != nil { log.Debugf("Error occurred when read table stats for table %s. The error message is %s.", tableInfo.Name.O, errors.ErrorStack(err)) continue } if tbl == nil { - deletedTableIDs = append(deletedTableIDs, tableID) + deletedTableIDs = append(deletedTableIDs, physicalID) continue } tbl.Version = version @@ -161,11 +168,45 @@ func (h *Handle) Update(is infoschema.InfoSchema) error { return nil } +func (h *Handle) getTableByPhysicalID(is infoschema.InfoSchema, physicalID int64) (table.Table, bool) { + if is.SchemaMetaVersion() != h.mu.schemaVersion { + h.mu.schemaVersion = is.SchemaMetaVersion() + h.mu.pid2tid = buildPartitionID2TableID(is) + } + if id, ok := h.mu.pid2tid[physicalID]; ok { + return is.TableByID(id) + } + return is.TableByID(physicalID) +} + +func buildPartitionID2TableID(is infoschema.InfoSchema) map[int64]int64 { + mapper := make(map[int64]int64) + for _, db := range is.AllSchemas() { + tbls := db.Tables + for _, tbl := range tbls { + pi := tbl.GetPartitionInfo() + if pi == nil { + continue + } + for _, def := range pi.Definitions { + mapper[def.ID] = tbl.ID + } + } + } + return mapper +} + // GetTableStats retrieves the statistics table from cache, and the cache will be updated by a goroutine. func (h *Handle) GetTableStats(tblInfo *model.TableInfo) *Table { - tbl, ok := h.statsCache.Load().(statsCache)[tblInfo.ID] + return h.GetPartitionStats(tblInfo, tblInfo.ID) +} + +// GetPartitionStats retrieves the partition stats from cache. +func (h *Handle) GetPartitionStats(tblInfo *model.TableInfo, pid int64) *Table { + tbl, ok := h.statsCache.Load().(statsCache)[pid] if !ok { tbl = PseudoTable(tblInfo) + tbl.PhysicalID = pid h.UpdateTableStats([]*Table{tbl}, nil) return tbl } @@ -185,7 +226,7 @@ func (h *Handle) copyFromOldCache() statsCache { func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64) { newCache := h.copyFromOldCache() for _, tbl := range tables { - id := tbl.TableID + id := tbl.PhysicalID newCache[id] = tbl } for _, id := range deletedIDs { diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 8f7d9a6731..49744337bb 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -99,11 +99,11 @@ func mockStatsHistogram(id int64, values []types.Datum, repeat int64, tp *types. func mockStatsTable(tbl *model.TableInfo, rowCount int64) *statistics.Table { histColl := statistics.HistColl{ - TableID: tbl.ID, - HaveTblID: true, - Count: rowCount, - Columns: make(map[int64]*statistics.Column, len(tbl.Columns)), - Indices: make(map[int64]*statistics.Index, len(tbl.Indices)), + PhysicalID: tbl.ID, + HavePhysicalID: true, + Count: rowCount, + Columns: make(map[int64]*statistics.Column, len(tbl.Columns)), + Indices: make(map[int64]*statistics.Index, len(tbl.Indices)), } statsTbl := &statistics.Table{ HistColl: histColl, diff --git a/statistics/table.go b/statistics/table.go index 88e0d74cf1..7ab1288237 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -45,34 +45,33 @@ const ( // Table represents statistics for a table. type Table struct { HistColl - Version uint64 - PKIsHandle bool + Version uint64 } // HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity. type HistColl struct { - TableID int64 - HaveTblID bool - Columns map[int64]*Column - Indices map[int64]*Index - colName2Idx map[string]int64 // map column name to index id - colName2ID map[string]int64 // map column name to column id - Pseudo bool - Count int64 - ModifyCount int64 // Total modify count in a table. + PhysicalID int64 // PhysicalID is the partition id for a partitioned table, otherwise, it is the table id. + HavePhysicalID bool + Columns map[int64]*Column + Indices map[int64]*Index + colName2Idx map[string]int64 // map column name to index id + colName2ID map[string]int64 // map column name to column id + Pseudo bool + Count int64 + ModifyCount int64 // Total modify count in a table. } func (t *Table) copy() *Table { newHistColl := HistColl{ - TableID: t.TableID, - HaveTblID: t.HaveTblID, - Count: t.Count, - Columns: make(map[int64]*Column), - Indices: make(map[int64]*Index), - colName2Idx: make(map[string]int64), - colName2ID: make(map[string]int64), - Pseudo: t.Pseudo, - ModifyCount: t.ModifyCount, + PhysicalID: t.PhysicalID, + HavePhysicalID: t.HavePhysicalID, + Count: t.Count, + Columns: make(map[int64]*Column), + Indices: make(map[int64]*Index), + colName2Idx: make(map[string]int64), + colName2ID: make(map[string]int64), + Pseudo: t.Pseudo, + ModifyCount: t.ModifyCount, } for id, col := range t.Columns { newHistColl.Columns[id] = col @@ -124,7 +123,7 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *Table, tableInfo *m errorRate := ErrorRate{} if isAnalyzed(row.GetInt64(8)) { h.mu.Lock() - h.mu.rateMap.clear(table.TableID, histID, true) + h.mu.rateMap.clear(table.PhysicalID, histID, true) h.mu.Unlock() } else if idx != nil { errorRate = idx.ErrorRate @@ -134,11 +133,11 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *Table, tableInfo *m continue } if idx == nil || idx.LastUpdateVersion < histVer { - hg, err := h.histogramFromStorage(tableInfo.ID, histID, types.NewFieldType(mysql.TypeBlob), distinct, 1, histVer, nullCount, 0) + hg, err := h.histogramFromStorage(table.PhysicalID, histID, types.NewFieldType(mysql.TypeBlob), distinct, 1, histVer, nullCount, 0) if err != nil { return errors.Trace(err) } - cms, err := h.cmSketchFromStorage(tableInfo.ID, 1, idxInfo.ID) + cms, err := h.cmSketchFromStorage(table.PhysicalID, 1, idxInfo.ID) if err != nil { return errors.Trace(err) } @@ -164,7 +163,7 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo * errorRate := ErrorRate{} if isAnalyzed(row.GetInt64(8)) { h.mu.Lock() - h.mu.rateMap.clear(table.TableID, histID, false) + h.mu.rateMap.clear(table.PhysicalID, histID, false) h.mu.Unlock() } else if col != nil { errorRate = col.ErrorRate @@ -184,7 +183,7 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo * (col == nil || col.Len() == 0 && col.LastUpdateVersion < histVer) && !loadAll if notNeedLoad { - count, err := h.columnCountFromStorage(table.TableID, histID) + count, err := h.columnCountFromStorage(table.PhysicalID, histID) if err != nil { return errors.Trace(err) } @@ -204,11 +203,11 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo * break } if col == nil || col.LastUpdateVersion < histVer || loadAll { - hg, err := h.histogramFromStorage(tableInfo.ID, histID, &colInfo.FieldType, distinct, 0, histVer, nullCount, totColSize) + hg, err := h.histogramFromStorage(table.PhysicalID, histID, &colInfo.FieldType, distinct, 0, histVer, nullCount, totColSize) if err != nil { return errors.Trace(err) } - cms, err := h.cmSketchFromStorage(tableInfo.ID, 0, colInfo.ID) + cms, err := h.cmSketchFromStorage(table.PhysicalID, 0, colInfo.ID) if err != nil { return errors.Trace(err) } @@ -240,18 +239,18 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo * } // tableStatsFromStorage loads table stats info from storage. -func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, loadAll bool) (*Table, error) { +func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, physicalID int64, loadAll bool) (*Table, error) { table, ok := h.statsCache.Load().(statsCache)[tableInfo.ID] // If table stats is pseudo, we also need to copy it, since we will use the column stats when // the average error rate of it is small. if !ok { histColl := HistColl{ - TableID: tableInfo.ID, - HaveTblID: true, - Columns: make(map[int64]*Column, len(tableInfo.Columns)), - Indices: make(map[int64]*Index, len(tableInfo.Indices)), - colName2Idx: make(map[string]int64), - colName2ID: make(map[string]int64), + PhysicalID: physicalID, + HavePhysicalID: true, + Columns: make(map[int64]*Column, len(tableInfo.Columns)), + Indices: make(map[int64]*Index, len(tableInfo.Indices)), + colName2Idx: make(map[string]int64), + colName2ID: make(map[string]int64), } table = &Table{ HistColl: histColl, @@ -261,7 +260,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, loadAll bool) table = table.copy() } table.Pseudo = false - selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag from mysql.stats_histograms where table_id = %d", tableInfo.ID) + selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag from mysql.stats_histograms where table_id = %d", physicalID) rows, _, err := h.restrictedExec.ExecRestrictedSQL(nil, selSQL) if err != nil { return nil, errors.Trace(err) @@ -288,7 +287,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, loadAll bool) // String implements Stringer interface. func (t *Table) String() string { strs := make([]string, 0, len(t.Columns)+1) - strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.TableID, t.Count)) + strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.PhysicalID, t.Count)) for _, col := range t.Columns { strs = append(strs, col.String()) } @@ -353,7 +352,7 @@ func (coll *HistColl) ColumnIsInvalid(sc *stmtctx.StatementContext, colID int64) } if col.NDV > 0 && col.Len() == 0 { sc.SetHistogramsNotLoad() - histogramNeededColumns.insert(tableColumnID{tableID: coll.TableID, columnID: colID}) + histogramNeededColumns.insert(tableColumnID{tableID: coll.PhysicalID, columnID: colID}) } return col.totalRowCount() == 0 || (col.NDV > 0 && col.Len() == 0) } @@ -533,16 +532,15 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idx *Index, // PseudoTable creates a pseudo table statistics. func PseudoTable(tblInfo *model.TableInfo) *Table { pseudoHistColl := HistColl{ - Count: pseudoRowCount, - TableID: tblInfo.ID, - HaveTblID: true, - Columns: make(map[int64]*Column, len(tblInfo.Columns)), - Indices: make(map[int64]*Index, len(tblInfo.Indices)), - Pseudo: true, + Count: pseudoRowCount, + PhysicalID: tblInfo.ID, + HavePhysicalID: true, + Columns: make(map[int64]*Column, len(tblInfo.Columns)), + Indices: make(map[int64]*Index, len(tblInfo.Indices)), + Pseudo: true, } t := &Table{ - HistColl: pseudoHistColl, - PKIsHandle: tblInfo.PKIsHandle, + HistColl: pseudoHistColl, } for _, col := range tblInfo.Columns { if col.State == model.StatePublic {