1147 lines
40 KiB
Go
1147 lines
40 KiB
Go
// Copyright 2017 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package statistics
|
|
|
|
import (
|
|
"cmp"
|
|
"fmt"
|
|
"maps"
|
|
"slices"
|
|
"strings"
|
|
|
|
"github.com/pingcap/tidb/pkg/expression"
|
|
"github.com/pingcap/tidb/pkg/meta/model"
|
|
"github.com/pingcap/tidb/pkg/parser/mysql"
|
|
"github.com/pingcap/tidb/pkg/planner/planctx"
|
|
"github.com/pingcap/tidb/pkg/types"
|
|
"github.com/pingcap/tidb/pkg/util/ranger"
|
|
"go.uber.org/atomic"
|
|
)
|
|
|
|
const (
|
|
// PseudoVersion means the pseudo statistics version is 0.
|
|
PseudoVersion uint64 = 0
|
|
|
|
// PseudoRowCount export for other pkg to use.
|
|
// When we haven't analyzed a table, we use pseudo statistics to estimate costs.
|
|
// It has row count 10000, equal condition selects 1/1000 of total rows, less condition selects 1/3 of total rows,
|
|
// between condition selects 1/40 of total rows.
|
|
PseudoRowCount = 10000
|
|
)
|
|
|
|
// CopyIntent specifies what data structures are safe to modify in the copied table.
|
|
type CopyIntent uint8
|
|
|
|
const (
|
|
// MetaOnly shares all maps - only table metadata is safe to modify
|
|
MetaOnly CopyIntent = iota
|
|
|
|
// ColumnMapWritable clones columns map - safe to add/remove columns
|
|
ColumnMapWritable
|
|
|
|
// IndexMapWritable clones indices map - safe to add/remove indices
|
|
IndexMapWritable
|
|
|
|
// BothMapsWritable clones both maps - safe to add/remove columns and indices
|
|
BothMapsWritable
|
|
|
|
// ExtendedStatsWritable shares all maps - safe to modify ExtendedStats field
|
|
ExtendedStatsWritable
|
|
|
|
// AllDataWritable deep copies everything - safe to modify all data including histograms
|
|
AllDataWritable
|
|
)
|
|
|
|
// AutoAnalyzeMinCnt means if the count of table is less than this value, we don't need to do auto analyze.
|
|
// Exported for testing.
|
|
var AutoAnalyzeMinCnt int64 = 1000
|
|
|
|
var (
|
|
// Below functions are used to solve cycle import problem.
|
|
// Note: all functions below will be removed after finishing moving all estimation functions into the cardinality package.
|
|
|
|
// GetRowCountByIndexRanges is a function type to get row count by index ranges.
|
|
GetRowCountByIndexRanges func(sctx planctx.PlanContext, coll *HistColl, idxID int64, indexRanges []*ranger.Range, idxCol []*expression.Column) (result RowEstimate, err error)
|
|
|
|
// GetRowCountByColumnRanges is a function type to get row count by column ranges.
|
|
GetRowCountByColumnRanges func(sctx planctx.PlanContext, coll *HistColl, colID int64, colRanges []*ranger.Range, pkIsHandle bool) (result RowEstimate, err error)
|
|
)
|
|
|
|
// Table represents statistics for a table.
|
|
type Table struct {
|
|
ExtendedStats *ExtendedStatsColl
|
|
|
|
ColAndIdxExistenceMap *ColAndIdxExistenceMap
|
|
HistColl
|
|
Version uint64
|
|
// It's the timestamp of the last analyze time.
|
|
// We used it in auto-analyze to determine if this table has been analyzed.
|
|
// The source of this field comes from two parts:
|
|
// 1. Initialized by snapshot when loading stats_meta.
|
|
// 2. Updated by the analysis time of a specific column or index when loading the histogram of the column or index.
|
|
LastAnalyzeVersion uint64
|
|
// LastStatsHistVersion is the mvcc version of the last update of histograms.
|
|
// It differs from LastAnalyzeVersion because it can be influenced by some DDL.
|
|
// e.g. When we execute ALTER TABLE ADD COLUMN, there'll be new record inserted into mysql.stats_histograms.
|
|
// We need to load the corresponding one into memory too.
|
|
// It's used to skip redundant loading of stats, i.e, if the cached stats is already update-to-date with mysql.stats_xxx tables,
|
|
// and the schema of the table does not change, we don't need to load the stats for this table again.
|
|
// Stats' sync load/async load should not change this field since they are not table-level update.
|
|
// It's hard to deal with the upgrade compatibility of this field, the field will not take effect unless
|
|
// auto analyze or DDL happened on the table.
|
|
LastStatsHistVersion uint64
|
|
// TblInfoUpdateTS is the UpdateTS of the TableInfo used when filling this struct.
|
|
// It is the schema version of the corresponding table. It is used to skip redundant
|
|
// loading of stats, i.e, if the cached stats is already update-to-date with mysql.stats_xxx tables,
|
|
// and the schema of the table does not change, we don't need to load the stats for this
|
|
// table again.
|
|
// TODO: it can be removed now that we've have LastAnalyseVersion and LastStatsHistVersion.
|
|
TblInfoUpdateTS uint64
|
|
|
|
IsPkIsHandle bool
|
|
}
|
|
|
|
// ColAndIdxExistenceMap is the meta map for statistics.Table.
|
|
// It can tell whether a column/index really has its statistics. So we won't send useless kv request when we do online stats loading.
|
|
type ColAndIdxExistenceMap struct {
|
|
colAnalyzed map[int64]bool
|
|
idxAnalyzed map[int64]bool
|
|
}
|
|
|
|
// DeleteColNotFound deletes the column with the given id.
|
|
func (m *ColAndIdxExistenceMap) DeleteColNotFound(id int64) {
|
|
delete(m.colAnalyzed, id)
|
|
}
|
|
|
|
// DeleteIdxNotFound deletes the index with the given id.
|
|
func (m *ColAndIdxExistenceMap) DeleteIdxNotFound(id int64) {
|
|
delete(m.idxAnalyzed, id)
|
|
}
|
|
|
|
// HasAnalyzed checks whether a column/index stats exists and it has stats.
|
|
// TODO: the map should only keep the analyzed cols.
|
|
// There's three possible status of column/index's statistics:
|
|
// 1. We don't have this column/index.
|
|
// 2. We have it, but it hasn't been analyzed yet.
|
|
// 3. We have it and its statistics.
|
|
//
|
|
// To figure out three status, we use HasAnalyzed's TRUE value to represents the status 3. The Has's FALSE to represents the status 1.
|
|
// Begin from v8.5.2, the 1. case becomes a nearly invalid case. It's just a middle state between happening of the DDL and the completion of the stats' ddl handler.
|
|
// But we may need to deal with the 1. for the upgrade compatibility.
|
|
func (m *ColAndIdxExistenceMap) HasAnalyzed(id int64, isIndex bool) bool {
|
|
if isIndex {
|
|
analyzed, ok := m.idxAnalyzed[id]
|
|
return ok && analyzed
|
|
}
|
|
analyzed, ok := m.colAnalyzed[id]
|
|
return ok && analyzed
|
|
}
|
|
|
|
// Has checks whether a column/index stats exists.
|
|
func (m *ColAndIdxExistenceMap) Has(id int64, isIndex bool) bool {
|
|
if isIndex {
|
|
_, ok := m.idxAnalyzed[id]
|
|
return ok
|
|
}
|
|
_, ok := m.colAnalyzed[id]
|
|
return ok
|
|
}
|
|
|
|
// InsertCol inserts a column with its meta into the map.
|
|
func (m *ColAndIdxExistenceMap) InsertCol(id int64, analyzed bool) {
|
|
m.colAnalyzed[id] = analyzed
|
|
}
|
|
|
|
// InsertIndex inserts an index with its meta into the map.
|
|
func (m *ColAndIdxExistenceMap) InsertIndex(id int64, analyzed bool) {
|
|
m.idxAnalyzed[id] = analyzed
|
|
}
|
|
|
|
// IsEmpty checks whether the map is empty.
|
|
func (m *ColAndIdxExistenceMap) IsEmpty() bool {
|
|
return len(m.colAnalyzed)+len(m.idxAnalyzed) == 0
|
|
}
|
|
|
|
// ColNum returns the number of columns in the map.
|
|
func (m *ColAndIdxExistenceMap) ColNum() int {
|
|
return len(m.colAnalyzed)
|
|
}
|
|
|
|
// Clone deeply copies the map.
|
|
func (m *ColAndIdxExistenceMap) Clone() *ColAndIdxExistenceMap {
|
|
mm := NewColAndIndexExistenceMap(len(m.colAnalyzed), len(m.idxAnalyzed))
|
|
mm.colAnalyzed = maps.Clone(m.colAnalyzed)
|
|
mm.idxAnalyzed = maps.Clone(m.idxAnalyzed)
|
|
return mm
|
|
}
|
|
|
|
const (
|
|
defaultColCap = 16
|
|
defaultIdxCap = 4
|
|
)
|
|
|
|
// NewColAndIndexExistenceMapWithoutSize return a new object with default capacity.
|
|
func NewColAndIndexExistenceMapWithoutSize() *ColAndIdxExistenceMap {
|
|
return &ColAndIdxExistenceMap{
|
|
colAnalyzed: make(map[int64]bool, defaultColCap),
|
|
idxAnalyzed: make(map[int64]bool, defaultIdxCap),
|
|
}
|
|
}
|
|
|
|
// NewColAndIndexExistenceMap return a new object with the given capcity.
|
|
func NewColAndIndexExistenceMap(colCap, idxCap int) *ColAndIdxExistenceMap {
|
|
return &ColAndIdxExistenceMap{
|
|
colAnalyzed: make(map[int64]bool, colCap),
|
|
idxAnalyzed: make(map[int64]bool, idxCap),
|
|
}
|
|
}
|
|
|
|
// ColAndIdxExistenceMapIsEqual is used in testing, checking whether the two are equal.
|
|
func ColAndIdxExistenceMapIsEqual(m1, m2 *ColAndIdxExistenceMap) bool {
|
|
return maps.Equal(m1.colAnalyzed, m2.colAnalyzed) && maps.Equal(m1.idxAnalyzed, m2.idxAnalyzed)
|
|
}
|
|
|
|
// ExtendedStatsItem is the cached item of a mysql.stats_extended record.
|
|
type ExtendedStatsItem struct {
|
|
StringVals string
|
|
ColIDs []int64
|
|
ScalarVals float64
|
|
Tp uint8
|
|
}
|
|
|
|
// ExtendedStatsColl is a collection of cached items for mysql.stats_extended records.
|
|
type ExtendedStatsColl struct {
|
|
Stats map[string]*ExtendedStatsItem
|
|
LastUpdateVersion uint64
|
|
}
|
|
|
|
// NewExtendedStatsColl allocate an ExtendedStatsColl struct.
|
|
func NewExtendedStatsColl() *ExtendedStatsColl {
|
|
return &ExtendedStatsColl{Stats: make(map[string]*ExtendedStatsItem)}
|
|
}
|
|
|
|
const (
|
|
// ExtendedStatsInited is the status for extended stats which are just registered but have not been analyzed yet.
|
|
ExtendedStatsInited uint8 = iota
|
|
// ExtendedStatsAnalyzed is the status for extended stats which have been collected in analyze.
|
|
ExtendedStatsAnalyzed
|
|
// ExtendedStatsDeleted is the status for extended stats which were dropped. These "deleted" records would be removed from storage by GCStats().
|
|
ExtendedStatsDeleted
|
|
)
|
|
|
|
// HistColl is a collection of histograms. It collects enough information for plan to calculate the selectivity.
|
|
type HistColl struct {
|
|
// Note that when used in a query, Column use UniqueID as the key while Indices use the index ID in the
|
|
// metadata. (See GenerateHistCollFromColumnInfo() for details)
|
|
columns map[int64]*Column
|
|
indices map[int64]*Index
|
|
PhysicalID int64
|
|
// TODO: add AnalyzeCount here
|
|
RealtimeCount int64 // RealtimeCount is the current table row count, maintained by applying stats delta based on AnalyzeCount.
|
|
ModifyCount int64 // Total modify count in a table.
|
|
|
|
// The version of the statistics, refer to Version0, Version1, Version2 and so on.
|
|
StatsVer int
|
|
Pseudo bool
|
|
|
|
/*
|
|
Fields below are only used in a query, like for estimation, and they will be useless when stored in
|
|
the stats cache. (See GenerateHistCollFromColumnInfo() for details)
|
|
*/
|
|
|
|
CanNotTriggerLoad bool
|
|
// Idx2ColUniqueIDs maps the index id to its column UniqueIDs. It's used to calculate the selectivity in planner.
|
|
Idx2ColUniqueIDs map[int64][]int64
|
|
// ColUniqueID2IdxIDs maps the column UniqueID to a list index ids whose first column is it.
|
|
// It's used to calculate the selectivity in planner.
|
|
ColUniqueID2IdxIDs map[int64][]int64
|
|
// UniqueID2colInfoID maps the column UniqueID to its ID in the metadata.
|
|
UniqueID2colInfoID map[int64]int64
|
|
// MVIdx2Columns maps the index id to its columns by expression.Column.
|
|
// For normal index, the column id is enough, as we already have in Idx2ColUniqueIDs. But currently, mv index needs more
|
|
// information to match the filter against the mv index columns, and we need this map to provide this information.
|
|
MVIdx2Columns map[int64][]*expression.Column
|
|
}
|
|
|
|
// NewHistColl creates a new HistColl.
|
|
func NewHistColl(id int64, realtimeCnt, modifyCnt int64, colNum, idxNum int) *HistColl {
|
|
return &HistColl{
|
|
columns: make(map[int64]*Column, colNum),
|
|
indices: make(map[int64]*Index, idxNum),
|
|
PhysicalID: id,
|
|
RealtimeCount: realtimeCnt,
|
|
ModifyCount: modifyCnt,
|
|
Idx2ColUniqueIDs: make(map[int64][]int64),
|
|
ColUniqueID2IdxIDs: make(map[int64][]int64),
|
|
UniqueID2colInfoID: make(map[int64]int64),
|
|
MVIdx2Columns: make(map[int64][]*expression.Column),
|
|
}
|
|
}
|
|
|
|
// NewHistCollWithColsAndIdxs creates a new HistColl with given columns and indices.
|
|
func NewHistCollWithColsAndIdxs(id int64, realtimeCnt, modifyCnt int64, cols map[int64]*Column, idxs map[int64]*Index) *HistColl {
|
|
return &HistColl{
|
|
columns: cols,
|
|
indices: idxs,
|
|
PhysicalID: id,
|
|
RealtimeCount: realtimeCnt,
|
|
ModifyCount: modifyCnt,
|
|
Idx2ColUniqueIDs: make(map[int64][]int64),
|
|
ColUniqueID2IdxIDs: make(map[int64][]int64),
|
|
UniqueID2colInfoID: make(map[int64]int64),
|
|
MVIdx2Columns: make(map[int64][]*expression.Column),
|
|
}
|
|
}
|
|
|
|
// SetCol sets the column with the given id.
|
|
func (coll *HistColl) SetCol(id int64, col *Column) {
|
|
coll.columns[id] = col
|
|
}
|
|
|
|
// SetIdx sets the index with the given id.
|
|
func (coll *HistColl) SetIdx(id int64, idx *Index) {
|
|
coll.indices[id] = idx
|
|
}
|
|
|
|
// GetCol gets the column with the given id.
|
|
func (coll *HistColl) GetCol(id int64) *Column {
|
|
return coll.columns[id]
|
|
}
|
|
|
|
// GetIdx gets the index with the given id.
|
|
func (coll *HistColl) GetIdx(id int64) *Index {
|
|
return coll.indices[id]
|
|
}
|
|
|
|
// ForEachColumnImmutable iterates all columns in the HistColl.
|
|
// The bool return value of f is used to control the iteration. If f returns true, the iteration will be stopped.
|
|
// Warning: Don't change the content when calling this function.
|
|
func (coll *HistColl) ForEachColumnImmutable(f func(int64, *Column) bool) {
|
|
for id, col := range coll.columns {
|
|
if f(id, col) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// ForEachIndexImmutable iterates all columns in the HistColl.
|
|
// The bool return value of f is used to control the iteration. If f returns true, the iteration will be stopped.
|
|
// WARNING: Don't change the content when calling this function.
|
|
func (coll *HistColl) ForEachIndexImmutable(f func(int64, *Index) bool) {
|
|
for id, idx := range coll.indices {
|
|
if f(id, idx) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// ColNum returns the number of columns in the HistColl.
|
|
func (coll *HistColl) ColNum() int {
|
|
return len(coll.columns)
|
|
}
|
|
|
|
// IdxNum returns the number of indices in the HistColl.
|
|
func (coll *HistColl) IdxNum() int {
|
|
return len(coll.indices)
|
|
}
|
|
|
|
// DelCol deletes the column with the given id.
|
|
func (t *Table) DelCol(id int64) {
|
|
delete(t.columns, id)
|
|
t.ColAndIdxExistenceMap.DeleteColNotFound(id)
|
|
}
|
|
|
|
// DelIdx deletes the index with the given id.
|
|
func (t *Table) DelIdx(id int64) {
|
|
delete(t.indices, id)
|
|
t.ColAndIdxExistenceMap.DeleteIdxNotFound(id)
|
|
}
|
|
|
|
// StableOrderColSlice returns a slice of columns in stable order.
|
|
func (coll *HistColl) StableOrderColSlice() []*Column {
|
|
cols := make([]*Column, 0, len(coll.columns))
|
|
for _, col := range coll.columns {
|
|
cols = append(cols, col)
|
|
}
|
|
slices.SortFunc(cols, func(c1, c2 *Column) int {
|
|
return cmp.Compare(c1.ID, c2.ID)
|
|
})
|
|
return cols
|
|
}
|
|
|
|
// GetColSlice returns a slice of columns without order.
|
|
func (coll *HistColl) GetColSlice() []*Column {
|
|
cols := make([]*Column, 0, len(coll.columns))
|
|
for _, col := range coll.columns {
|
|
cols = append(cols, col)
|
|
}
|
|
return cols
|
|
}
|
|
|
|
// StableOrderIdxSlice returns a slice of indices in stable order.
|
|
func (coll *HistColl) StableOrderIdxSlice() []*Index {
|
|
idxs := make([]*Index, 0, len(coll.indices))
|
|
for _, idx := range coll.indices {
|
|
idxs = append(idxs, idx)
|
|
}
|
|
slices.SortFunc(idxs, func(i1, i2 *Index) int {
|
|
return cmp.Compare(i1.ID, i2.ID)
|
|
})
|
|
return idxs
|
|
}
|
|
|
|
// GetIdxSlice returns a slice of indices without order.
|
|
func (coll *HistColl) GetIdxSlice() []*Index {
|
|
idxs := make([]*Index, 0, len(coll.indices))
|
|
for _, idx := range coll.indices {
|
|
idxs = append(idxs, idx)
|
|
}
|
|
return idxs
|
|
}
|
|
|
|
// SetAllIndexFullLoadForBootstrap sets all indices' stats loaded status to full load for bootstrap.
|
|
func (coll *HistColl) SetAllIndexFullLoadForBootstrap() {
|
|
for _, idx := range coll.indices {
|
|
idx.StatsLoadedStatus = NewStatsFullLoadStatus()
|
|
}
|
|
}
|
|
|
|
// CalcPreScalar calculates the pre-calculated scalar for all columns and indices.
|
|
func (coll *HistColl) CalcPreScalar() {
|
|
for _, idx := range coll.indices {
|
|
for i := 1; i < idx.Len(); i++ {
|
|
idx.Buckets[i].Count += idx.Buckets[i-1].Count
|
|
}
|
|
idx.PreCalculateScalar()
|
|
}
|
|
for _, col := range coll.columns {
|
|
for i := 1; i < col.Len(); i++ {
|
|
col.Buckets[i].Count += col.Buckets[i-1].Count
|
|
}
|
|
col.PreCalculateScalar()
|
|
}
|
|
}
|
|
|
|
// DropEvicted will drop the unnecessary data for all columns and indices. It's triggerred by stats cache.
|
|
func (coll *HistColl) DropEvicted() {
|
|
for _, col := range coll.columns {
|
|
if !col.IsStatsInitialized() || col.GetEvictedStatus() == AllEvicted {
|
|
continue
|
|
}
|
|
col.DropUnnecessaryData()
|
|
}
|
|
for _, idx := range coll.indices {
|
|
if !idx.IsStatsInitialized() || idx.GetEvictedStatus() == AllEvicted {
|
|
continue
|
|
}
|
|
idx.DropUnnecessaryData()
|
|
}
|
|
}
|
|
|
|
// TableMemoryUsage records tbl memory usage
|
|
type TableMemoryUsage struct {
|
|
ColumnsMemUsage map[int64]CacheItemMemoryUsage
|
|
IndicesMemUsage map[int64]CacheItemMemoryUsage
|
|
TableID int64
|
|
TotalMemUsage int64
|
|
}
|
|
|
|
// TotalIdxTrackingMemUsage returns total indices' tracking memory usage
|
|
func (t *TableMemoryUsage) TotalIdxTrackingMemUsage() (sum int64) {
|
|
for _, idx := range t.IndicesMemUsage {
|
|
sum += idx.TrackingMemUsage()
|
|
}
|
|
return sum
|
|
}
|
|
|
|
// TotalColTrackingMemUsage returns total columns' tracking memory usage
|
|
func (t *TableMemoryUsage) TotalColTrackingMemUsage() (sum int64) {
|
|
for _, col := range t.ColumnsMemUsage {
|
|
sum += col.TrackingMemUsage()
|
|
}
|
|
return sum
|
|
}
|
|
|
|
// TotalTrackingMemUsage return total tracking memory usage
|
|
func (t *TableMemoryUsage) TotalTrackingMemUsage() int64 {
|
|
return t.TotalIdxTrackingMemUsage() + t.TotalColTrackingMemUsage()
|
|
}
|
|
|
|
// TableCacheItem indicates the unit item stored in statsCache, eg: Column/Index
|
|
type TableCacheItem interface {
|
|
ItemID() int64
|
|
MemoryUsage() CacheItemMemoryUsage
|
|
IsAllEvicted() bool
|
|
GetEvictedStatus() int
|
|
|
|
DropUnnecessaryData()
|
|
IsStatsInitialized() bool
|
|
GetStatsVer() int64
|
|
}
|
|
|
|
// CacheItemMemoryUsage indicates the memory usage of TableCacheItem
|
|
type CacheItemMemoryUsage interface {
|
|
ItemID() int64
|
|
TotalMemoryUsage() int64
|
|
TrackingMemUsage() int64
|
|
HistMemUsage() int64
|
|
TopnMemUsage() int64
|
|
CMSMemUsage() int64
|
|
}
|
|
|
|
// ColumnMemUsage records column memory usage
|
|
type ColumnMemUsage struct {
|
|
ColumnID int64
|
|
HistogramMemUsage int64
|
|
CMSketchMemUsage int64
|
|
FMSketchMemUsage int64
|
|
TopNMemUsage int64
|
|
TotalMemUsage int64
|
|
}
|
|
|
|
// TotalMemoryUsage implements CacheItemMemoryUsage
|
|
func (c *ColumnMemUsage) TotalMemoryUsage() int64 {
|
|
return c.TotalMemUsage
|
|
}
|
|
|
|
// ItemID implements CacheItemMemoryUsage
|
|
func (c *ColumnMemUsage) ItemID() int64 {
|
|
return c.ColumnID
|
|
}
|
|
|
|
// TrackingMemUsage implements CacheItemMemoryUsage
|
|
func (c *ColumnMemUsage) TrackingMemUsage() int64 {
|
|
return c.CMSketchMemUsage + c.TopNMemUsage + c.HistogramMemUsage
|
|
}
|
|
|
|
// HistMemUsage implements CacheItemMemoryUsage
|
|
func (c *ColumnMemUsage) HistMemUsage() int64 {
|
|
return c.HistogramMemUsage
|
|
}
|
|
|
|
// TopnMemUsage implements CacheItemMemoryUsage
|
|
func (c *ColumnMemUsage) TopnMemUsage() int64 {
|
|
return c.TopNMemUsage
|
|
}
|
|
|
|
// CMSMemUsage implements CacheItemMemoryUsage
|
|
func (c *ColumnMemUsage) CMSMemUsage() int64 {
|
|
return c.CMSketchMemUsage
|
|
}
|
|
|
|
// IndexMemUsage records index memory usage
|
|
type IndexMemUsage struct {
|
|
IndexID int64
|
|
HistogramMemUsage int64
|
|
CMSketchMemUsage int64
|
|
TopNMemUsage int64
|
|
TotalMemUsage int64
|
|
}
|
|
|
|
// TotalMemoryUsage implements CacheItemMemoryUsage
|
|
func (c *IndexMemUsage) TotalMemoryUsage() int64 {
|
|
return c.TotalMemUsage
|
|
}
|
|
|
|
// ItemID implements CacheItemMemoryUsage
|
|
func (c *IndexMemUsage) ItemID() int64 {
|
|
return c.IndexID
|
|
}
|
|
|
|
// TrackingMemUsage implements CacheItemMemoryUsage
|
|
func (c *IndexMemUsage) TrackingMemUsage() int64 {
|
|
return c.CMSketchMemUsage + c.TopNMemUsage + c.HistogramMemUsage
|
|
}
|
|
|
|
// HistMemUsage implements CacheItemMemoryUsage
|
|
func (c *IndexMemUsage) HistMemUsage() int64 {
|
|
return c.HistogramMemUsage
|
|
}
|
|
|
|
// TopnMemUsage implements CacheItemMemoryUsage
|
|
func (c *IndexMemUsage) TopnMemUsage() int64 {
|
|
return c.TopNMemUsage
|
|
}
|
|
|
|
// CMSMemUsage implements CacheItemMemoryUsage
|
|
func (c *IndexMemUsage) CMSMemUsage() int64 {
|
|
return c.CMSketchMemUsage
|
|
}
|
|
|
|
// MemoryUsage returns the total memory usage of this Table.
|
|
// it will only calc the size of Columns and Indices stats data of table.
|
|
// We ignore the size of other metadata in Table
|
|
func (t *Table) MemoryUsage() *TableMemoryUsage {
|
|
tMemUsage := &TableMemoryUsage{
|
|
TableID: t.PhysicalID,
|
|
ColumnsMemUsage: make(map[int64]CacheItemMemoryUsage),
|
|
IndicesMemUsage: make(map[int64]CacheItemMemoryUsage),
|
|
}
|
|
for _, col := range t.columns {
|
|
if col != nil {
|
|
colMemUsage := col.MemoryUsage()
|
|
tMemUsage.ColumnsMemUsage[colMemUsage.ItemID()] = colMemUsage
|
|
tMemUsage.TotalMemUsage += colMemUsage.TotalMemoryUsage()
|
|
}
|
|
}
|
|
for _, index := range t.indices {
|
|
if index != nil {
|
|
idxMemUsage := index.MemoryUsage()
|
|
tMemUsage.IndicesMemUsage[idxMemUsage.ItemID()] = idxMemUsage
|
|
tMemUsage.TotalMemUsage += idxMemUsage.TotalMemoryUsage()
|
|
}
|
|
}
|
|
return tMemUsage
|
|
}
|
|
|
|
// CopyAs creates a copy of the table with the specified writability intent.
|
|
//
|
|
// PERFORMANCE NOTE: Choose the most minimal intent for your use case. Copying is heavily
|
|
// used at scale and unnecessary cloning causes significant memory pressure. Only use
|
|
// AllDataWritable when you truly need to modify histogram data.
|
|
//
|
|
// MetaOnly: Shares all maps, only metadata modifications are safe
|
|
// ColumnMapWritable: Clones columns map, safe to add/remove columns
|
|
// IndexMapWritable: Clones indices map, safe to add/remove indices
|
|
// BothMapsWritable: Clones both maps - safe to add/remove columns and indices
|
|
// ExtendedStatsWritable: Shares all maps, safe to modify ExtendedStats field
|
|
// AllDataWritable: Deep copies everything, safe to modify all data including histograms
|
|
func (t *Table) CopyAs(intent CopyIntent) *Table {
|
|
var columns map[int64]*Column
|
|
var indices map[int64]*Index
|
|
var existenceMap *ColAndIdxExistenceMap
|
|
|
|
switch intent {
|
|
case MetaOnly:
|
|
columns = t.columns
|
|
indices = t.indices
|
|
existenceMap = t.ColAndIdxExistenceMap
|
|
case ColumnMapWritable:
|
|
columns = maps.Clone(t.columns)
|
|
indices = t.indices
|
|
if t.ColAndIdxExistenceMap != nil {
|
|
existenceMap = t.ColAndIdxExistenceMap.Clone()
|
|
}
|
|
case IndexMapWritable:
|
|
columns = t.columns
|
|
indices = maps.Clone(t.indices)
|
|
if t.ColAndIdxExistenceMap != nil {
|
|
existenceMap = t.ColAndIdxExistenceMap.Clone()
|
|
}
|
|
case BothMapsWritable:
|
|
columns = maps.Clone(t.columns)
|
|
indices = maps.Clone(t.indices)
|
|
if t.ColAndIdxExistenceMap != nil {
|
|
existenceMap = t.ColAndIdxExistenceMap.Clone()
|
|
}
|
|
case ExtendedStatsWritable:
|
|
columns = t.columns
|
|
indices = t.indices
|
|
existenceMap = t.ColAndIdxExistenceMap
|
|
case AllDataWritable:
|
|
// For deep copy, create new maps and deep copy all content
|
|
columns = make(map[int64]*Column, len(t.columns))
|
|
for id, col := range t.columns {
|
|
columns[id] = col.Copy()
|
|
}
|
|
indices = make(map[int64]*Index, len(t.indices))
|
|
for id, idx := range t.indices {
|
|
indices[id] = idx.Copy()
|
|
}
|
|
if t.ColAndIdxExistenceMap != nil {
|
|
existenceMap = t.ColAndIdxExistenceMap.Clone()
|
|
}
|
|
}
|
|
|
|
newHistColl := HistColl{
|
|
PhysicalID: t.PhysicalID,
|
|
RealtimeCount: t.RealtimeCount,
|
|
columns: columns,
|
|
indices: indices,
|
|
Pseudo: t.Pseudo,
|
|
ModifyCount: t.ModifyCount,
|
|
StatsVer: t.StatsVer,
|
|
}
|
|
nt := &Table{
|
|
HistColl: newHistColl,
|
|
Version: t.Version,
|
|
TblInfoUpdateTS: t.TblInfoUpdateTS,
|
|
ColAndIdxExistenceMap: existenceMap,
|
|
LastAnalyzeVersion: t.LastAnalyzeVersion,
|
|
LastStatsHistVersion: t.LastStatsHistVersion,
|
|
}
|
|
|
|
// Handle ExtendedStats for deep copy vs shallow copy
|
|
if (intent == AllDataWritable || intent == ExtendedStatsWritable) && t.ExtendedStats != nil {
|
|
newExtStatsColl := &ExtendedStatsColl{
|
|
Stats: make(map[string]*ExtendedStatsItem),
|
|
LastUpdateVersion: t.ExtendedStats.LastUpdateVersion,
|
|
}
|
|
maps.Copy(newExtStatsColl.Stats, t.ExtendedStats.Stats)
|
|
nt.ExtendedStats = newExtStatsColl
|
|
} else {
|
|
nt.ExtendedStats = t.ExtendedStats
|
|
}
|
|
|
|
return nt
|
|
}
|
|
|
|
// String implements Stringer interface.
|
|
func (t *Table) String() string {
|
|
strs := make([]string, 0, len(t.columns)+1)
|
|
strs = append(strs, fmt.Sprintf("Table:%d RealtimeCount:%d", t.PhysicalID, t.RealtimeCount))
|
|
cols := make([]*Column, 0, len(t.columns))
|
|
for _, col := range t.columns {
|
|
cols = append(cols, col)
|
|
}
|
|
slices.SortFunc(cols, func(i, j *Column) int { return cmp.Compare(i.ID, j.ID) })
|
|
for _, col := range cols {
|
|
strs = append(strs, col.String())
|
|
}
|
|
idxs := make([]*Index, 0, len(t.indices))
|
|
for _, idx := range t.indices {
|
|
idxs = append(idxs, idx)
|
|
}
|
|
slices.SortFunc(idxs, func(i, j *Index) int { return cmp.Compare(i.ID, j.ID) })
|
|
for _, idx := range idxs {
|
|
strs = append(strs, idx.String())
|
|
}
|
|
// TODO: concat content of ExtendedStatsColl
|
|
return strings.Join(strs, "\n")
|
|
}
|
|
|
|
// IndexStartWithColumn finds the first index whose first column is the given column.
|
|
func (t *Table) IndexStartWithColumn(colName string) *Index {
|
|
for _, index := range t.indices {
|
|
if index.Info.Columns[0].Name.L == colName {
|
|
return index
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ColumnByName finds the statistics.Column for the given column.
|
|
func (t *Table) ColumnByName(colName string) *Column {
|
|
for _, c := range t.columns {
|
|
if c.Info.Name.L == colName {
|
|
return c
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetStatsInfo returns their statistics according to the ID of the column or index, including histogram, CMSketch, TopN and FMSketch.
|
|
//
|
|
// needCopy: In order to protect the item in the cache from being damaged, we need to copy the item.
|
|
func (t *Table) GetStatsInfo(id int64, isIndex bool, needCopy bool) (*Histogram, *CMSketch, *TopN, *FMSketch, bool) {
|
|
if isIndex {
|
|
if idxStatsInfo, ok := t.indices[id]; ok {
|
|
if needCopy {
|
|
return idxStatsInfo.Histogram.Copy(),
|
|
idxStatsInfo.CMSketch.Copy(), idxStatsInfo.TopN.Copy(), idxStatsInfo.FMSketch.Copy(), true
|
|
}
|
|
return &idxStatsInfo.Histogram,
|
|
idxStatsInfo.CMSketch, idxStatsInfo.TopN, idxStatsInfo.FMSketch, true
|
|
}
|
|
// newly added index which is not analyzed yet
|
|
return nil, nil, nil, nil, false
|
|
}
|
|
if colStatsInfo, ok := t.columns[id]; ok {
|
|
if needCopy {
|
|
return colStatsInfo.Histogram.Copy(), colStatsInfo.CMSketch.Copy(),
|
|
colStatsInfo.TopN.Copy(), colStatsInfo.FMSketch.Copy(), true
|
|
}
|
|
return &colStatsInfo.Histogram, colStatsInfo.CMSketch,
|
|
colStatsInfo.TopN, colStatsInfo.FMSketch, true
|
|
}
|
|
// newly added column which is not analyzed yet
|
|
return nil, nil, nil, nil, false
|
|
}
|
|
|
|
// IsAnalyzed checks whether the table is analyzed or not by checking its last analyze's timestamp value.
|
|
// A valid timestamp must be greater than 0.
|
|
func (t *Table) IsAnalyzed() bool {
|
|
return t.LastAnalyzeVersion > 0
|
|
}
|
|
|
|
// IsEligibleForAnalysis checks whether the table is eligible for analysis.
|
|
func (t *Table) IsEligibleForAnalysis() bool {
|
|
// 1. If the statistics are either not loaded or are classified as pseudo, there is no need for analyze.
|
|
// Pseudo statistics can be created by the optimizer, so we need to double check it.
|
|
// 2. If the table is too small, we don't want to waste time to analyze it.
|
|
// Leave the opportunity to other bigger tables.
|
|
if !t.MeetAutoAnalyzeMinCnt() || t.Pseudo {
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// MeetAutoAnalyzeMinCnt checks whether the table meets the minimum count required for auto-analyze.
|
|
func (t *Table) MeetAutoAnalyzeMinCnt() bool {
|
|
return t != nil && t.RealtimeCount >= AutoAnalyzeMinCnt
|
|
}
|
|
|
|
// GetAnalyzeRowCount tries to get the row count of a column or an index if possible.
|
|
// This method is useful because this row count doesn't consider the modify count.
|
|
func (coll *HistColl) GetAnalyzeRowCount() float64 {
|
|
ids := slices.Collect(maps.Keys(coll.columns))
|
|
slices.Sort(ids)
|
|
for _, id := range ids {
|
|
col := coll.columns[id]
|
|
if col != nil && col.IsFullLoad() {
|
|
return col.TotalRowCount()
|
|
}
|
|
}
|
|
clear(ids)
|
|
ids = slices.Grow(ids, len(coll.indices))
|
|
ids = slices.AppendSeq(ids, maps.Keys(coll.indices))
|
|
slices.Sort(ids)
|
|
for _, id := range ids {
|
|
idx := coll.indices[id]
|
|
if idx == nil {
|
|
continue
|
|
}
|
|
if idx.Info != nil && idx.Info.MVIndex {
|
|
continue
|
|
}
|
|
if idx.IsFullLoad() {
|
|
return idx.TotalRowCount()
|
|
}
|
|
}
|
|
return -1
|
|
}
|
|
|
|
// GetScaledRealtimeAndModifyCnt scale the RealtimeCount and ModifyCount for some special indexes where the total row
|
|
// count is different from the total row count of the table. Currently, only the mv index is this case.
|
|
// Because we will use the RealtimeCount and ModifyCount during the estimation for ranges on this index (like the upper
|
|
// bound for the out-of-range estimation logic and the IncreaseFactor logic), we can't directly use the RealtimeCount and
|
|
// ModifyCount of the table. Instead, we should scale them before using.
|
|
// For example, if the table analyze row count is 1000 and realtime row count is 1500, and the mv index total count is 5000,
|
|
// when calculating the IncreaseFactor, it should be 1500/1000 = 1.5 for normal columns/indexes, and we should use the
|
|
// same 1.5 for mv index. But obviously, use 1500/5000 would be wrong, the correct calculation should be 7500/5000 = 1.5.
|
|
// So we add this function to get this 7500.
|
|
func (coll *HistColl) GetScaledRealtimeAndModifyCnt(idxStats *Index) (realtimeCnt, modifyCnt int64) {
|
|
// In theory, we can apply this scale logic on all indexes. But currently, we only apply it on the mv index to avoid
|
|
// any unexpected changes caused by factors like precision difference.
|
|
if idxStats == nil || idxStats.Info == nil || !idxStats.Info.MVIndex || !idxStats.IsFullLoad() {
|
|
return coll.RealtimeCount, coll.ModifyCount
|
|
}
|
|
analyzeRowCount := coll.GetAnalyzeRowCount()
|
|
if analyzeRowCount <= 0 {
|
|
return coll.RealtimeCount, coll.ModifyCount
|
|
}
|
|
idxTotalRowCount := idxStats.TotalRowCount()
|
|
if idxTotalRowCount <= 0 {
|
|
return coll.RealtimeCount, coll.ModifyCount
|
|
}
|
|
scale := idxTotalRowCount / analyzeRowCount
|
|
return int64(float64(coll.RealtimeCount) * scale), int64(float64(coll.ModifyCount) * scale)
|
|
}
|
|
|
|
// GetStatsHealthy calculates stats healthy if the table stats is not pseudo.
|
|
// If the table stats is pseudo, it returns 0, false, otherwise it returns stats healthy, true.
|
|
func (t *Table) GetStatsHealthy() (int64, bool) {
|
|
if t == nil || t.Pseudo {
|
|
return 0, false
|
|
}
|
|
if !t.IsAnalyzed() {
|
|
return 0, true
|
|
}
|
|
var healthy int64
|
|
count := float64(t.RealtimeCount)
|
|
if histCount := t.GetAnalyzeRowCount(); histCount > 0 {
|
|
count = histCount
|
|
}
|
|
if float64(t.ModifyCount) < count {
|
|
healthy = int64((1.0 - float64(t.ModifyCount)/count) * 100.0)
|
|
} else if t.ModifyCount == 0 {
|
|
healthy = 100
|
|
}
|
|
return healthy, true
|
|
}
|
|
|
|
// ColumnIsLoadNeeded checks whether the column needs trigger the async/sync load.
|
|
// The Column should be visible in the table and really has analyzed statistics in the storage.
|
|
// Also, if the stats has been loaded into the memory, we also don't need to load it.
|
|
// We return the Column together with the checking result, to avoid accessing the map multiple times.
|
|
// The first bool is whether we need to load it into memory. The second bool is whether this column has stats in the system table or not.
|
|
func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (col *Column, loadNeeded, hasAnalyzed bool) {
|
|
if t.Pseudo {
|
|
return nil, false, false
|
|
}
|
|
hasAnalyzed = t.ColAndIdxExistenceMap.HasAnalyzed(id, false)
|
|
col, ok := t.columns[id]
|
|
if !ok {
|
|
// If The column have no stats object in memory. We need to check it by existence map.
|
|
// If existence map says it even has no unitialized record in storage, we don't need to do anything. => Has=false, HasAnalyzed=false
|
|
// If existence map says it has analyzed stats, we need to load it from storage. => Has=true, HasAnalyzed=true
|
|
// If existence map says it has no analyzed stats but have a uninitialized record in storage, we need to also create a fake object. => Has=true, HasAnalyzed=false
|
|
return nil, t.ColAndIdxExistenceMap.Has(id, false), hasAnalyzed
|
|
}
|
|
|
|
// If it's not analyzed yet.
|
|
// The real check condition: !ok && !hashAnalyzed.(Has must be true since we've have the memory object so we should have the storage object)
|
|
// After this check, we will always have ok && hasAnalyzed.
|
|
if !hasAnalyzed {
|
|
return nil, false, false
|
|
}
|
|
|
|
// Restore the condition from the simplified form:
|
|
// 1. ok && hasAnalyzed && fullLoad && !col.IsFullLoad => need load
|
|
// 2. ok && hasAnalyzed && !fullLoad && !col.statsInitialized => need load
|
|
if (fullLoad && !col.IsFullLoad()) || (!fullLoad && !col.statsInitialized) {
|
|
return col, true, true
|
|
}
|
|
|
|
// Otherwise don't need load it.
|
|
return col, false, true
|
|
}
|
|
|
|
// IndexIsLoadNeeded checks whether the index needs trigger the async/sync load.
|
|
// The Index should be visible in the table and really has analyzed statistics in the storage.
|
|
// Also, if the stats has been loaded into the memory, we also don't need to load it.
|
|
// We return the Index together with the checking result, to avoid accessing the map multiple times.
|
|
func (t *Table) IndexIsLoadNeeded(id int64) (*Index, bool) {
|
|
idx, ok := t.indices[id]
|
|
// If the index is not in the memory, and we have its stats in the storage. We need to trigger the load.
|
|
if !ok && t.ColAndIdxExistenceMap.HasAnalyzed(id, true) {
|
|
return nil, true
|
|
}
|
|
// If the index is in the memory, we check its embedded func.
|
|
if ok && idx.IsAnalyzed() && !idx.IsFullLoad() {
|
|
return idx, true
|
|
}
|
|
return idx, false
|
|
}
|
|
|
|
// RatioOfPseudoEstimate means if modifyCount / statsTblCount is greater than this ratio, we think the stats is invalid
|
|
// and use pseudo estimation.
|
|
var RatioOfPseudoEstimate = atomic.NewFloat64(0.7)
|
|
|
|
// IsInitialized returns true if any column/index stats of the table is initialized.
|
|
func (t *Table) IsInitialized() bool {
|
|
for _, col := range t.columns {
|
|
if col != nil && col.IsStatsInitialized() {
|
|
return true
|
|
}
|
|
}
|
|
for _, idx := range t.indices {
|
|
if idx != nil && idx.IsStatsInitialized() {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// IsOutdated returns true if the table stats is outdated.
|
|
func (t *Table) IsOutdated() bool {
|
|
rowcount := t.GetAnalyzeRowCount()
|
|
if rowcount < 0 {
|
|
rowcount = float64(t.RealtimeCount)
|
|
}
|
|
if rowcount > 0 && float64(t.ModifyCount)/rowcount > RatioOfPseudoEstimate.Load() {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// ReleaseAndPutToPool releases data structures of Table and put itself back to pool.
|
|
func (t *Table) ReleaseAndPutToPool() {
|
|
for _, col := range t.columns {
|
|
col.FMSketch.DestroyAndPutToPool()
|
|
}
|
|
clear(t.columns)
|
|
for _, idx := range t.indices {
|
|
idx.FMSketch.DestroyAndPutToPool()
|
|
}
|
|
clear(t.indices)
|
|
}
|
|
|
|
// ID2UniqueID generates a new HistColl whose `Columns` is built from UniqueID of given columns.
|
|
func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl {
|
|
cols := make(map[int64]*Column)
|
|
for _, col := range columns {
|
|
colHist, ok := coll.columns[col.ID]
|
|
if ok {
|
|
cols[col.UniqueID] = colHist
|
|
}
|
|
}
|
|
newColl := &HistColl{
|
|
PhysicalID: coll.PhysicalID,
|
|
Pseudo: coll.Pseudo,
|
|
RealtimeCount: coll.RealtimeCount,
|
|
ModifyCount: coll.ModifyCount,
|
|
columns: cols,
|
|
}
|
|
return newColl
|
|
}
|
|
|
|
// GenerateHistCollFromColumnInfo generates a new HistColl whose ColUniqueID2IdxIDs and Idx2ColUniqueIDs is built from the given parameter.
|
|
func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, columns []*expression.Column) *HistColl {
|
|
newColHistMap := make(map[int64]*Column)
|
|
colInfoID2Col := make(map[int64]*expression.Column, len(columns))
|
|
colInfoID2UniqueID := make(map[int64]int64, len(columns))
|
|
uniqueID2colInfoID := make(map[int64]int64, len(columns))
|
|
idxID2idxInfo := make(map[int64]*model.IndexInfo)
|
|
for _, col := range columns {
|
|
colInfoID2Col[col.ID] = col
|
|
colInfoID2UniqueID[col.ID] = col.UniqueID
|
|
uniqueID2colInfoID[col.UniqueID] = col.ID
|
|
}
|
|
for id, colHist := range coll.columns {
|
|
uniqueID, ok := colInfoID2UniqueID[id]
|
|
// Collect the statistics by the given columns.
|
|
if ok {
|
|
newColHistMap[uniqueID] = colHist
|
|
}
|
|
}
|
|
for _, idxInfo := range tblInfo.Indices {
|
|
idxID2idxInfo[idxInfo.ID] = idxInfo
|
|
}
|
|
newIdxHistMap := make(map[int64]*Index)
|
|
idx2Columns := make(map[int64][]int64)
|
|
colID2IdxIDs := make(map[int64][]int64)
|
|
mvIdx2Columns := make(map[int64][]*expression.Column)
|
|
for id, idxHist := range coll.indices {
|
|
idxInfo := idxID2idxInfo[id]
|
|
if idxInfo == nil {
|
|
continue
|
|
}
|
|
ids := make([]int64, 0, len(idxInfo.Columns))
|
|
for _, idxCol := range idxInfo.Columns {
|
|
uniqueID, ok := colInfoID2UniqueID[tblInfo.Columns[idxCol.Offset].ID]
|
|
if !ok {
|
|
break
|
|
}
|
|
ids = append(ids, uniqueID)
|
|
}
|
|
// If the length of the id list is 0, this index won't be used in this query.
|
|
if len(ids) == 0 {
|
|
continue
|
|
}
|
|
colID2IdxIDs[ids[0]] = append(colID2IdxIDs[ids[0]], idxHist.ID)
|
|
newIdxHistMap[idxHist.ID] = idxHist
|
|
idx2Columns[idxHist.ID] = ids
|
|
if idxInfo.MVIndex {
|
|
cols, ok := PrepareCols4MVIndex(tblInfo, idxInfo, colInfoID2Col, true)
|
|
if ok {
|
|
mvIdx2Columns[id] = cols
|
|
}
|
|
}
|
|
}
|
|
for _, idxIDs := range colID2IdxIDs {
|
|
slices.Sort(idxIDs)
|
|
}
|
|
newColl := &HistColl{
|
|
PhysicalID: coll.PhysicalID,
|
|
Pseudo: coll.Pseudo,
|
|
RealtimeCount: coll.RealtimeCount,
|
|
ModifyCount: coll.ModifyCount,
|
|
columns: newColHistMap,
|
|
indices: newIdxHistMap,
|
|
ColUniqueID2IdxIDs: colID2IdxIDs,
|
|
Idx2ColUniqueIDs: idx2Columns,
|
|
UniqueID2colInfoID: uniqueID2colInfoID,
|
|
MVIdx2Columns: mvIdx2Columns,
|
|
}
|
|
return newColl
|
|
}
|
|
|
|
// PseudoHistColl creates a lightweight pseudo HistColl for cost calculation.
|
|
// This is optimized for cases where only HistColl is needed, avoiding the overhead
|
|
// of creating a full pseudo table with ColAndIdxExistenceMap and other structures.
|
|
func PseudoHistColl(physicalID int64, allowTriggerLoading bool) HistColl {
|
|
return HistColl{
|
|
RealtimeCount: PseudoRowCount,
|
|
PhysicalID: physicalID,
|
|
columns: nil,
|
|
indices: nil,
|
|
Pseudo: true,
|
|
CanNotTriggerLoad: !allowTriggerLoading,
|
|
ModifyCount: 0,
|
|
StatsVer: 0,
|
|
}
|
|
}
|
|
|
|
// PseudoTable creates a pseudo table statistics.
|
|
// Usually, we don't want to trigger stats loading for pseudo table.
|
|
// But there are exceptional cases. In such cases, we should pass allowTriggerLoading as true.
|
|
// Such case could possibly happen in getStatsTable().
|
|
func PseudoTable(tblInfo *model.TableInfo, allowTriggerLoading bool, allowFillHistMeta bool) *Table {
|
|
t := &Table{
|
|
HistColl: PseudoHistColl(tblInfo.ID, allowTriggerLoading),
|
|
Version: PseudoVersion,
|
|
ColAndIdxExistenceMap: NewColAndIndexExistenceMap(len(tblInfo.Columns), len(tblInfo.Indices)),
|
|
}
|
|
|
|
// Initialize columns and indices maps only when allowFillHistMeta is true
|
|
if allowFillHistMeta {
|
|
t.columns = make(map[int64]*Column, len(tblInfo.Columns))
|
|
t.indices = make(map[int64]*Index, len(tblInfo.Indices))
|
|
}
|
|
|
|
for _, col := range tblInfo.Columns {
|
|
// The column is public to use. Also we should check the column is not hidden since hidden means that it's used by expression index.
|
|
// We would not collect stats for the hidden column and we won't use the hidden column to estimate.
|
|
// Thus we don't create pseudo stats for it.
|
|
if col.State == model.StatePublic && !col.Hidden {
|
|
t.ColAndIdxExistenceMap.InsertCol(col.ID, false)
|
|
if allowFillHistMeta {
|
|
t.columns[col.ID] = &Column{
|
|
PhysicalID: tblInfo.ID,
|
|
Info: col,
|
|
IsHandle: tblInfo.PKIsHandle && mysql.HasPriKeyFlag(col.GetFlag()),
|
|
Histogram: *NewPseudoHistogram(col.ID, &col.FieldType),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for _, idx := range tblInfo.Indices {
|
|
if idx.State == model.StatePublic {
|
|
t.ColAndIdxExistenceMap.InsertIndex(idx.ID, false)
|
|
if allowFillHistMeta {
|
|
t.indices[idx.ID] = &Index{
|
|
PhysicalID: tblInfo.ID,
|
|
Info: idx,
|
|
Histogram: *NewPseudoHistogram(idx.ID, types.NewFieldType(mysql.TypeBlob)),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return t
|
|
}
|
|
|
|
// CheckAnalyzeVerOnTable checks whether the given version is the one from the tbl.
|
|
// If not, it will return false and set the version to the tbl's.
|
|
// We use this check to make sure all the statistics of the table are in the same version.
|
|
func CheckAnalyzeVerOnTable(tbl *Table, version *int) bool {
|
|
if IsAnalyzed(int64(tbl.StatsVer)) && tbl.StatsVer != *version {
|
|
*version = tbl.StatsVer
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// PrepareCols4MVIndex helps to identify the columns of an MV index. We need this information for estimation.
|
|
// This logic is shared between the estimation logic and the access path generation logic. We'd like to put the mv index
|
|
// related functions together in the planner/core package. So we use this trick here to avoid the import cycle.
|
|
var PrepareCols4MVIndex func(
|
|
tableInfo *model.TableInfo,
|
|
mvIndex *model.IndexInfo,
|
|
tblColsByID map[int64]*expression.Column,
|
|
checkOnly1ArrayTypeCol bool,
|
|
) (idxCols []*expression.Column, ok bool)
|