statistics: make sure PQ can analyze all indexes with stats version 1 (#57258)

close pingcap/tidb#57257
This commit is contained in:
Rustin
2024-11-11 16:03:48 +08:00
committed by GitHub
parent e16613d0bc
commit 1b490966af
5 changed files with 81 additions and 10 deletions

View File

@ -244,8 +244,10 @@ func (j *DynamicPartitionedTableAnalysisJob) analyzePartitionIndexes(
sysProcTracker sysproctrack.Tracker,
) (success bool) {
analyzePartitionBatchSize := int(variable.AutoAnalyzePartitionBatchSize.Load())
// For version 2, analyze one index will analyze all other indexes and columns.
// For version 1, analyze one index will only analyze the specified index.
analyzeVersion := sctx.GetSessionVars().AnalyzeVersion
OnlyPickOneIndex:
for indexName, partitionNames := range j.PartitionIndexes {
needAnalyzePartitionNames := make([]any, 0, len(partitionNames))
for _, partition := range partitionNames {
@ -262,10 +264,16 @@ OnlyPickOneIndex:
params := append([]any{j.TableSchema, j.GlobalTableName}, needAnalyzePartitionNames[start:end]...)
params = append(params, indexName)
success = exec.AutoAnalyze(sctx, statsHandle, sysProcTracker, j.TableStatsVer, sql, params...)
if !success {
return false
}
}
// For version 1, we need to analyze all indexes.
if analyzeVersion != 1 {
// Halt execution after analyzing one index.
// This is because analyzing a single index also analyzes all other indexes and columns.
// Therefore, to avoid redundancy, we prevent multiple analyses of the same partition.
break OnlyPickOneIndex
break
}
}
return

View File

@ -212,6 +212,18 @@ func (j *NonPartitionedTableAnalysisJob) analyzeIndexes(
if len(j.Indexes) == 0 {
return true
}
// For version 2, analyze one index will analyze all other indexes and columns.
// For version 1, analyze one index will only analyze the specified index.
analyzeVersion := sctx.GetSessionVars().AnalyzeVersion
if analyzeVersion == 1 {
for _, index := range j.Indexes {
sql, params := j.GenSQLForAnalyzeIndex(index)
if !exec.AutoAnalyze(sctx, statsHandle, sysProcTracker, j.TableStatsVer, sql, params...) {
return false
}
}
return true
}
// Only analyze the first index.
// This is because analyzing a single index also analyzes all other indexes and columns.
// Therefore, to avoid redundancy, we prevent multiple analyses of the same table.

View File

@ -376,7 +376,7 @@ func (pq *AnalysisPriorityQueue) ProcessDMLChanges() {
// Only update if we've seen a newer version
if newMaxVersion > lastFetchTimestamp {
statslogutil.StatsLogger().Info("Updating last fetch timestamp", zap.Uint64("new_max_version", newMaxVersion))
queueSamplerLogger().Info("Updating last fetch timestamp", zap.Uint64("new_max_version", newMaxVersion))
pq.syncFields.lastDMLUpdateFetchTimestamp = newMaxVersion
}
return nil
@ -404,12 +404,6 @@ func (pq *AnalysisPriorityQueue) processTableStats(
return errors.Trace(err)
}
jobFactory := NewAnalysisJobFactory(sctx, autoAnalyzeRatio, currentTs)
// Check if the table is needed to be analyzed.
// Note: Unanalyzed tables will also be considered.
changePercent := jobFactory.CalculateChangePercentage(stats)
if changePercent == 0 {
return nil
}
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
pruneMode := variable.PartitionPruneMode(sctx.GetSessionVars().PartitionPruneMode.Load())
@ -455,7 +449,6 @@ func (pq *AnalysisPriorityQueue) tryCreateJob(
}
tableInfo, ok := pq.statsHandle.TableInfoByID(is, stats.PhysicalID)
tableMeta := tableInfo.Meta()
if !ok {
statslogutil.StatsLogger().Warn(
"Table info not found for table id",
@ -463,6 +456,7 @@ func (pq *AnalysisPriorityQueue) tryCreateJob(
)
return nil
}
tableMeta := tableInfo.Meta()
schemaName, ok := is.SchemaNameByTableID(tableMeta.ID)
if !ok {
statslogutil.StatsLogger().Warn(

View File

@ -902,3 +902,48 @@ func TestVectorIndexTriggerAutoAnalyze(t *testing.T) {
// No event is found
require.Nil(t, addIndexEvent)
}
func TestAddIndexTriggerAutoAnalyzeWithStatsVersion1(t *testing.T) {
store, do := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("set @@global.tidb_analyze_version=1;")
testKit.MustExec("use test")
testKit.MustExec("create table t (c1 int, c2 int, index idx(c1, c2)) partition by range columns (c1) (partition p0 values less than (5), partition p1 values less than (10))")
is := do.InfoSchema()
tbl, err := is.TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t"))
require.NoError(t, err)
tableInfo := tbl.Meta()
h := do.StatsHandle()
// Analyze table.
testKit.MustExec("analyze table t")
require.NoError(t, h.Update(context.Background(), do.InfoSchema()))
// Insert some data.
testKit.MustExec("insert into t values (1,2),(2,2)")
require.NoError(t, h.DumpStatsDeltaToKV(true))
require.NoError(t, h.Update(context.Background(), do.InfoSchema()))
// Add two indexes.
testKit.MustExec("alter table t add index idx1(c1)")
testKit.MustExec("alter table t add index idx2(c2)")
statistics.AutoAnalyzeMinCnt = 0
defer func() {
statistics.AutoAnalyzeMinCnt = 1000
}()
pq := priorityqueue.NewAnalysisPriorityQueue(h)
defer pq.Close()
require.NoError(t, pq.Initialize())
isEmpty, err := pq.IsEmpty()
require.NoError(t, err)
require.False(t, isEmpty)
job, err := pq.Peek()
require.NoError(t, err)
require.Equal(t, tableInfo.ID, job.GetTableID())
require.NoError(t, job.Analyze(h, do.SysProcTracker()))
// Check the stats of the indexes.
tableStats := h.GetTableStats(tableInfo)
require.True(t, tableStats.GetIdx(1).IsAnalyzed())
require.True(t, tableStats.GetIdx(2).IsAnalyzed())
require.True(t, tableStats.GetIdx(3).IsAnalyzed())
}

View File

@ -225,6 +225,18 @@ func (j *StaticPartitionedTableAnalysisJob) analyzeStaticPartitionIndexes(
if len(j.Indexes) == 0 {
return true
}
// For version 2, analyze one index will analyze all other indexes and columns.
// For version 1, analyze one index will only analyze the specified index.
analyzeVersion := sctx.GetSessionVars().AnalyzeVersion
if analyzeVersion == 1 {
for _, index := range j.Indexes {
sql, params := j.GenSQLForAnalyzeStaticPartitionIndex(index)
if !exec.AutoAnalyze(sctx, statsHandle, sysProcTracker, j.TableStatsVer, sql, params...) {
return false
}
}
return true
}
// Only analyze the first index.
// This is because analyzing a single index also analyzes all other indexes and columns.
// Therefore, to avoid redundancy, we prevent multiple analyses of the same partition.