stats: support loading partition stats (#7305)

This commit is contained in:
Haibin Xie
2018-08-09 20:00:03 +08:00
committed by GitHub
parent fe6e710877
commit 00839ceeec
6 changed files with 138 additions and 95 deletions

View File

@ -15,11 +15,10 @@ package executor_test
import (
"fmt"
"strings"
. "github.com/pingcap/check"
"github.com/pingcap/tidb/executor"
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/session"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/util/testkit"
)
@ -47,14 +46,19 @@ PARTITION BY RANGE ( a ) (
c.Assert(err, IsNil)
pi := table.Meta().GetPartitionInfo()
c.Assert(pi, NotNil)
ids := make([]string, 0, len(pi.Definitions))
do, err := session.GetDomain(s.store)
c.Assert(err, IsNil)
handle := do.StatsHandle()
for _, def := range pi.Definitions {
ids = append(ids, fmt.Sprintf("%d", def.ID))
statsTbl := handle.GetPartitionStats(table.Meta(), def.ID)
c.Assert(statsTbl.Pseudo, IsFalse)
c.Assert(len(statsTbl.Columns), Equals, 2)
c.Assert(len(statsTbl.Indices), Equals, 1)
for _, col := range statsTbl.Columns {
c.Assert(col.Len(), Greater, 0)
}
for _, idx := range statsTbl.Indices {
c.Assert(idx.Len(), Greater, 0)
}
}
result := tk.MustQuery(fmt.Sprintf("select count(distinct(table_id)) from mysql.stats_meta where table_id in (%s)", strings.Join(ids, ",")))
result.Check(testkit.Rows(fmt.Sprintf("%d", len(ids))))
result = tk.MustQuery(fmt.Sprintf("select count(distinct(table_id)) from mysql.stats_histograms where table_id in (%s)", strings.Join(ids, ",")))
result.Check(testkit.Rows(fmt.Sprintf("%d", len(ids))))
result = tk.MustQuery(fmt.Sprintf("select count(distinct(table_id)) from mysql.stats_buckets where table_id in (%s)", strings.Join(ids, ",")))
result.Check(testkit.Rows(fmt.Sprintf("%d", len(ids))))
}

View File

@ -27,30 +27,30 @@ import (
"golang.org/x/net/context"
)
func initStatsMeta4Chunk(is infoschema.InfoSchema, tables statsCache, iter *chunk.Iterator4Chunk) {
func (h *Handle) initStatsMeta4Chunk(is infoschema.InfoSchema, tables statsCache, iter *chunk.Iterator4Chunk) {
for row := iter.Begin(); row != iter.End(); row = iter.Next() {
tableID := row.GetInt64(1)
table, ok := is.TableByID(tableID)
physicalID := row.GetInt64(1)
table, ok := h.getTableByPhysicalID(is, physicalID)
if !ok {
log.Debugf("Unknown table ID %d in stats meta table, maybe it has been dropped", tableID)
log.Debugf("Unknown physical ID %d in stats meta table, maybe it has been dropped", physicalID)
continue
}
tableInfo := table.Meta()
newHistColl := HistColl{
TableID: tableInfo.ID,
HaveTblID: true,
Count: row.GetInt64(3),
ModifyCount: row.GetInt64(2),
Columns: make(map[int64]*Column, len(tableInfo.Columns)),
Indices: make(map[int64]*Index, len(tableInfo.Indices)),
colName2Idx: make(map[string]int64, len(tableInfo.Columns)),
colName2ID: make(map[string]int64, len(tableInfo.Columns)),
PhysicalID: physicalID,
HavePhysicalID: true,
Count: row.GetInt64(3),
ModifyCount: row.GetInt64(2),
Columns: make(map[int64]*Column, len(tableInfo.Columns)),
Indices: make(map[int64]*Index, len(tableInfo.Indices)),
colName2Idx: make(map[string]int64, len(tableInfo.Columns)),
colName2ID: make(map[string]int64, len(tableInfo.Columns)),
}
tbl := &Table{
HistColl: newHistColl,
Version: row.GetUint64(0),
}
tables[tableID] = tbl
tables[physicalID] = tbl
}
}
@ -76,19 +76,19 @@ func (h *Handle) initStatsMeta(is infoschema.InfoSchema) (statsCache, error) {
if chk.NumRows() == 0 {
break
}
initStatsMeta4Chunk(is, tables, iter)
h.initStatsMeta4Chunk(is, tables, iter)
}
return tables, nil
}
func initStatsHistograms4Chunk(is infoschema.InfoSchema, tables statsCache, iter *chunk.Iterator4Chunk) {
func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables statsCache, iter *chunk.Iterator4Chunk) {
for row := iter.Begin(); row != iter.End(); row = iter.Next() {
table, ok := tables[row.GetInt64(0)]
if !ok {
continue
}
id, ndv, nullCount, version, totColSize := row.GetInt64(2), row.GetInt64(3), row.GetInt64(5), row.GetUint64(4), row.GetInt64(7)
tbl, _ := is.TableByID(table.TableID)
tbl, _ := h.getTableByPhysicalID(is, table.PhysicalID)
if row.GetInt64(1) > 0 {
var idxInfo *model.IndexInfo
for _, idx := range tbl.Meta().Indices {
@ -145,7 +145,7 @@ func (h *Handle) initStatsHistograms(is infoschema.InfoSchema, tables statsCache
if chk.NumRows() == 0 {
break
}
initStatsHistograms4Chunk(is, tables, iter)
h.initStatsHistograms4Chunk(is, tables, iter)
}
return nil
}

View File

@ -58,7 +58,7 @@ func dumpJSONCol(hist *Histogram, CMSketch *CMSketch) *jsonColumn {
// DumpStatsToJSON dumps statistic to json.
func (h *Handle) DumpStatsToJSON(dbName string, tableInfo *model.TableInfo) (*JSONTable, error) {
tbl, err := h.tableStatsFromStorage(tableInfo, true)
tbl, err := h.tableStatsFromStorage(tableInfo, tableInfo.ID, true)
if err != nil {
return nil, errors.Trace(err)
}
@ -101,18 +101,18 @@ func (h *Handle) LoadStatsFromJSON(is infoschema.InfoSchema, jsonTbl *JSONTable)
}
for _, col := range tbl.Columns {
err = h.SaveStatsToStorage(tbl.TableID, tbl.Count, 0, &col.Histogram, col.CMSketch, 1)
err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, &col.Histogram, col.CMSketch, 1)
if err != nil {
return errors.Trace(err)
}
}
for _, idx := range tbl.Indices {
err = h.SaveStatsToStorage(tbl.TableID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, 1)
err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, 1)
if err != nil {
return errors.Trace(err)
}
}
err = h.SaveMetaToStorage(tbl.TableID, tbl.Count, tbl.ModifyCount)
err = h.SaveMetaToStorage(tbl.PhysicalID, tbl.Count, tbl.ModifyCount)
if err != nil {
return errors.Trace(err)
}
@ -122,12 +122,12 @@ func (h *Handle) LoadStatsFromJSON(is infoschema.InfoSchema, jsonTbl *JSONTable)
// LoadStatsFromJSONToTable load statistic from JSONTable and return the Table of statistic.
func (h *Handle) LoadStatsFromJSONToTable(tableInfo *model.TableInfo, jsonTbl *JSONTable) (*Table, error) {
newHistColl := HistColl{
TableID: tableInfo.ID,
HaveTblID: true,
Count: jsonTbl.Count,
ModifyCount: jsonTbl.ModifyCount,
Columns: make(map[int64]*Column, len(jsonTbl.Columns)),
Indices: make(map[int64]*Index, len(jsonTbl.Indices)),
PhysicalID: tableInfo.ID,
HavePhysicalID: true,
Count: jsonTbl.Count,
ModifyCount: jsonTbl.ModifyCount,
Columns: make(map[int64]*Column, len(jsonTbl.Columns)),
Indices: make(map[int64]*Index, len(jsonTbl.Indices)),
}
tbl := &Table{
HistColl: newHistColl,

View File

@ -25,6 +25,7 @@ import (
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/store/tikv/oracle"
"github.com/pingcap/tidb/table"
"github.com/pingcap/tidb/util/sqlexec"
log "github.com/sirupsen/logrus"
)
@ -40,6 +41,10 @@ type Handle struct {
lastVersion uint64
// rateMap contains the error rate delta from feedback.
rateMap errorRateDeltaMap
// pid2tid is the map from partition ID to table ID.
pid2tid map[int64]int64
// schemaVersion is the version of information schema when `pid2tid` is built.
schemaVersion int64
}
restrictedExec sqlexec.RestrictedSQLExecutor
@ -128,25 +133,27 @@ func (h *Handle) Update(is infoschema.InfoSchema) error {
deletedTableIDs := make([]int64, 0, len(rows))
for _, row := range rows {
version := row.GetUint64(0)
tableID := row.GetInt64(1)
physicalID := row.GetInt64(1)
modifyCount := row.GetInt64(2)
count := row.GetInt64(3)
lastVersion = version
table, ok := is.TableByID(tableID)
h.mu.Lock()
table, ok := h.getTableByPhysicalID(is, physicalID)
h.mu.Unlock()
if !ok {
log.Debugf("Unknown table ID %d in stats meta table, maybe it has been dropped", tableID)
deletedTableIDs = append(deletedTableIDs, tableID)
log.Debugf("Unknown physical ID %d in stats meta table, maybe it has been dropped", physicalID)
deletedTableIDs = append(deletedTableIDs, physicalID)
continue
}
tableInfo := table.Meta()
tbl, err := h.tableStatsFromStorage(tableInfo, false)
tbl, err := h.tableStatsFromStorage(tableInfo, physicalID, false)
// Error is not nil may mean that there are some ddl changes on this table, we will not update it.
if err != nil {
log.Debugf("Error occurred when read table stats for table %s. The error message is %s.", tableInfo.Name.O, errors.ErrorStack(err))
continue
}
if tbl == nil {
deletedTableIDs = append(deletedTableIDs, tableID)
deletedTableIDs = append(deletedTableIDs, physicalID)
continue
}
tbl.Version = version
@ -161,11 +168,45 @@ func (h *Handle) Update(is infoschema.InfoSchema) error {
return nil
}
func (h *Handle) getTableByPhysicalID(is infoschema.InfoSchema, physicalID int64) (table.Table, bool) {
if is.SchemaMetaVersion() != h.mu.schemaVersion {
h.mu.schemaVersion = is.SchemaMetaVersion()
h.mu.pid2tid = buildPartitionID2TableID(is)
}
if id, ok := h.mu.pid2tid[physicalID]; ok {
return is.TableByID(id)
}
return is.TableByID(physicalID)
}
func buildPartitionID2TableID(is infoschema.InfoSchema) map[int64]int64 {
mapper := make(map[int64]int64)
for _, db := range is.AllSchemas() {
tbls := db.Tables
for _, tbl := range tbls {
pi := tbl.GetPartitionInfo()
if pi == nil {
continue
}
for _, def := range pi.Definitions {
mapper[def.ID] = tbl.ID
}
}
}
return mapper
}
// GetTableStats retrieves the statistics table from cache, and the cache will be updated by a goroutine.
func (h *Handle) GetTableStats(tblInfo *model.TableInfo) *Table {
tbl, ok := h.statsCache.Load().(statsCache)[tblInfo.ID]
return h.GetPartitionStats(tblInfo, tblInfo.ID)
}
// GetPartitionStats retrieves the partition stats from cache.
func (h *Handle) GetPartitionStats(tblInfo *model.TableInfo, pid int64) *Table {
tbl, ok := h.statsCache.Load().(statsCache)[pid]
if !ok {
tbl = PseudoTable(tblInfo)
tbl.PhysicalID = pid
h.UpdateTableStats([]*Table{tbl}, nil)
return tbl
}
@ -185,7 +226,7 @@ func (h *Handle) copyFromOldCache() statsCache {
func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64) {
newCache := h.copyFromOldCache()
for _, tbl := range tables {
id := tbl.TableID
id := tbl.PhysicalID
newCache[id] = tbl
}
for _, id := range deletedIDs {

View File

@ -99,11 +99,11 @@ func mockStatsHistogram(id int64, values []types.Datum, repeat int64, tp *types.
func mockStatsTable(tbl *model.TableInfo, rowCount int64) *statistics.Table {
histColl := statistics.HistColl{
TableID: tbl.ID,
HaveTblID: true,
Count: rowCount,
Columns: make(map[int64]*statistics.Column, len(tbl.Columns)),
Indices: make(map[int64]*statistics.Index, len(tbl.Indices)),
PhysicalID: tbl.ID,
HavePhysicalID: true,
Count: rowCount,
Columns: make(map[int64]*statistics.Column, len(tbl.Columns)),
Indices: make(map[int64]*statistics.Index, len(tbl.Indices)),
}
statsTbl := &statistics.Table{
HistColl: histColl,

View File

@ -45,34 +45,33 @@ const (
// Table represents statistics for a table.
type Table struct {
HistColl
Version uint64
PKIsHandle bool
Version uint64
}
// HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity.
type HistColl struct {
TableID int64
HaveTblID bool
Columns map[int64]*Column
Indices map[int64]*Index
colName2Idx map[string]int64 // map column name to index id
colName2ID map[string]int64 // map column name to column id
Pseudo bool
Count int64
ModifyCount int64 // Total modify count in a table.
PhysicalID int64 // PhysicalID is the partition id for a partitioned table, otherwise, it is the table id.
HavePhysicalID bool
Columns map[int64]*Column
Indices map[int64]*Index
colName2Idx map[string]int64 // map column name to index id
colName2ID map[string]int64 // map column name to column id
Pseudo bool
Count int64
ModifyCount int64 // Total modify count in a table.
}
func (t *Table) copy() *Table {
newHistColl := HistColl{
TableID: t.TableID,
HaveTblID: t.HaveTblID,
Count: t.Count,
Columns: make(map[int64]*Column),
Indices: make(map[int64]*Index),
colName2Idx: make(map[string]int64),
colName2ID: make(map[string]int64),
Pseudo: t.Pseudo,
ModifyCount: t.ModifyCount,
PhysicalID: t.PhysicalID,
HavePhysicalID: t.HavePhysicalID,
Count: t.Count,
Columns: make(map[int64]*Column),
Indices: make(map[int64]*Index),
colName2Idx: make(map[string]int64),
colName2ID: make(map[string]int64),
Pseudo: t.Pseudo,
ModifyCount: t.ModifyCount,
}
for id, col := range t.Columns {
newHistColl.Columns[id] = col
@ -124,7 +123,7 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *Table, tableInfo *m
errorRate := ErrorRate{}
if isAnalyzed(row.GetInt64(8)) {
h.mu.Lock()
h.mu.rateMap.clear(table.TableID, histID, true)
h.mu.rateMap.clear(table.PhysicalID, histID, true)
h.mu.Unlock()
} else if idx != nil {
errorRate = idx.ErrorRate
@ -134,11 +133,11 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *Table, tableInfo *m
continue
}
if idx == nil || idx.LastUpdateVersion < histVer {
hg, err := h.histogramFromStorage(tableInfo.ID, histID, types.NewFieldType(mysql.TypeBlob), distinct, 1, histVer, nullCount, 0)
hg, err := h.histogramFromStorage(table.PhysicalID, histID, types.NewFieldType(mysql.TypeBlob), distinct, 1, histVer, nullCount, 0)
if err != nil {
return errors.Trace(err)
}
cms, err := h.cmSketchFromStorage(tableInfo.ID, 1, idxInfo.ID)
cms, err := h.cmSketchFromStorage(table.PhysicalID, 1, idxInfo.ID)
if err != nil {
return errors.Trace(err)
}
@ -164,7 +163,7 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo *
errorRate := ErrorRate{}
if isAnalyzed(row.GetInt64(8)) {
h.mu.Lock()
h.mu.rateMap.clear(table.TableID, histID, false)
h.mu.rateMap.clear(table.PhysicalID, histID, false)
h.mu.Unlock()
} else if col != nil {
errorRate = col.ErrorRate
@ -184,7 +183,7 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo *
(col == nil || col.Len() == 0 && col.LastUpdateVersion < histVer) &&
!loadAll
if notNeedLoad {
count, err := h.columnCountFromStorage(table.TableID, histID)
count, err := h.columnCountFromStorage(table.PhysicalID, histID)
if err != nil {
return errors.Trace(err)
}
@ -204,11 +203,11 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo *
break
}
if col == nil || col.LastUpdateVersion < histVer || loadAll {
hg, err := h.histogramFromStorage(tableInfo.ID, histID, &colInfo.FieldType, distinct, 0, histVer, nullCount, totColSize)
hg, err := h.histogramFromStorage(table.PhysicalID, histID, &colInfo.FieldType, distinct, 0, histVer, nullCount, totColSize)
if err != nil {
return errors.Trace(err)
}
cms, err := h.cmSketchFromStorage(tableInfo.ID, 0, colInfo.ID)
cms, err := h.cmSketchFromStorage(table.PhysicalID, 0, colInfo.ID)
if err != nil {
return errors.Trace(err)
}
@ -240,18 +239,18 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *Table, tableInfo *
}
// tableStatsFromStorage loads table stats info from storage.
func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, loadAll bool) (*Table, error) {
func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, physicalID int64, loadAll bool) (*Table, error) {
table, ok := h.statsCache.Load().(statsCache)[tableInfo.ID]
// If table stats is pseudo, we also need to copy it, since we will use the column stats when
// the average error rate of it is small.
if !ok {
histColl := HistColl{
TableID: tableInfo.ID,
HaveTblID: true,
Columns: make(map[int64]*Column, len(tableInfo.Columns)),
Indices: make(map[int64]*Index, len(tableInfo.Indices)),
colName2Idx: make(map[string]int64),
colName2ID: make(map[string]int64),
PhysicalID: physicalID,
HavePhysicalID: true,
Columns: make(map[int64]*Column, len(tableInfo.Columns)),
Indices: make(map[int64]*Index, len(tableInfo.Indices)),
colName2Idx: make(map[string]int64),
colName2ID: make(map[string]int64),
}
table = &Table{
HistColl: histColl,
@ -261,7 +260,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, loadAll bool)
table = table.copy()
}
table.Pseudo = false
selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag from mysql.stats_histograms where table_id = %d", tableInfo.ID)
selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag from mysql.stats_histograms where table_id = %d", physicalID)
rows, _, err := h.restrictedExec.ExecRestrictedSQL(nil, selSQL)
if err != nil {
return nil, errors.Trace(err)
@ -288,7 +287,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, loadAll bool)
// String implements Stringer interface.
func (t *Table) String() string {
strs := make([]string, 0, len(t.Columns)+1)
strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.TableID, t.Count))
strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.PhysicalID, t.Count))
for _, col := range t.Columns {
strs = append(strs, col.String())
}
@ -353,7 +352,7 @@ func (coll *HistColl) ColumnIsInvalid(sc *stmtctx.StatementContext, colID int64)
}
if col.NDV > 0 && col.Len() == 0 {
sc.SetHistogramsNotLoad()
histogramNeededColumns.insert(tableColumnID{tableID: coll.TableID, columnID: colID})
histogramNeededColumns.insert(tableColumnID{tableID: coll.PhysicalID, columnID: colID})
}
return col.totalRowCount() == 0 || (col.NDV > 0 && col.Len() == 0)
}
@ -533,16 +532,15 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idx *Index,
// PseudoTable creates a pseudo table statistics.
func PseudoTable(tblInfo *model.TableInfo) *Table {
pseudoHistColl := HistColl{
Count: pseudoRowCount,
TableID: tblInfo.ID,
HaveTblID: true,
Columns: make(map[int64]*Column, len(tblInfo.Columns)),
Indices: make(map[int64]*Index, len(tblInfo.Indices)),
Pseudo: true,
Count: pseudoRowCount,
PhysicalID: tblInfo.ID,
HavePhysicalID: true,
Columns: make(map[int64]*Column, len(tblInfo.Columns)),
Indices: make(map[int64]*Index, len(tblInfo.Indices)),
Pseudo: true,
}
t := &Table{
HistColl: pseudoHistColl,
PKIsHandle: tblInfo.PKIsHandle,
HistColl: pseudoHistColl,
}
for _, col := range tblInfo.Columns {
if col.State == model.StatePublic {