132 lines
5.1 KiB
Go
132 lines
5.1 KiB
Go
// Copyright 2021 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package statistics
|
|
|
|
import (
|
|
"fmt"
|
|
)
|
|
|
|
// NonPartitionTableID is the partition id for non-partition table.
|
|
const NonPartitionTableID = -1
|
|
|
|
// AnalyzeTableID is hybrid table id used to analyze table.
|
|
type AnalyzeTableID struct {
|
|
TableID int64
|
|
// PartitionID is used for the construction of partition table statistics. It indicate the ID of the partition.
|
|
// If the table is not the partition table, the PartitionID will be equal to NonPartitionTableID.
|
|
PartitionID int64
|
|
}
|
|
|
|
// GetStatisticsID is used to obtain the table ID to build statistics.
|
|
// If the 'PartitionID == NonPartitionTableID', we use the TableID to build the statistics for non-partition tables.
|
|
// Otherwise, we use the PartitionID to build the statistics of the partitions in the partition tables.
|
|
func (h *AnalyzeTableID) GetStatisticsID() int64 {
|
|
statisticsID := h.TableID
|
|
if h.PartitionID != NonPartitionTableID {
|
|
statisticsID = h.PartitionID
|
|
}
|
|
return statisticsID
|
|
}
|
|
|
|
// IsPartitionTable indicates whether the table is partition table.
|
|
func (h *AnalyzeTableID) IsPartitionTable() bool {
|
|
return h.PartitionID != NonPartitionTableID
|
|
}
|
|
|
|
func (h *AnalyzeTableID) String() string {
|
|
return fmt.Sprintf("%d => %v", h.PartitionID, h.TableID)
|
|
}
|
|
|
|
// Equals indicates whether two table id is equal.
|
|
func (h *AnalyzeTableID) Equals(t *AnalyzeTableID) bool {
|
|
if h == t {
|
|
return true
|
|
}
|
|
if h == nil || t == nil {
|
|
return false
|
|
}
|
|
return h.TableID == t.TableID && h.PartitionID == t.PartitionID
|
|
}
|
|
|
|
// AnalyzeResult is used to represent analyze result.
|
|
// In version2 analyze, we use the following structure to represent the analyze result.
|
|
// It represents the list of analyze result for all columns when isIndex is 0.
|
|
// Also represents the list of analyze result for all indexes when idIndex is 1.
|
|
type AnalyzeResult struct {
|
|
Hist []*Histogram
|
|
Cms []*CMSketch
|
|
TopNs []*TopN
|
|
Fms []*FMSketch
|
|
IsIndex int
|
|
}
|
|
|
|
// DestroyAndPutToPool destroys the result and put it to the pool.
|
|
func (a *AnalyzeResult) DestroyAndPutToPool() {
|
|
for _, f := range a.Fms {
|
|
f.DestroyAndPutToPool()
|
|
}
|
|
for _, h := range a.Hist {
|
|
h.DestroyAndPutToPool()
|
|
}
|
|
}
|
|
|
|
// AnalyzeResults represents the analyze results of a task.
|
|
type AnalyzeResults struct {
|
|
Err error
|
|
ExtStats *ExtendedStatsColl
|
|
Job *AnalyzeJob
|
|
// Ars: combine the analyze result of all columns and the analyze result of indexes.
|
|
// (In stats version2)
|
|
// For example:
|
|
// If the tableA (c1, c2, c3) has indexes (c1, c2), (c2, c3), the result will be:
|
|
// Ars: [AnalyzeResult1[c1, c2, c3], AnalyzeResult2[c1_c2, c2_c3]]
|
|
Ars []*AnalyzeResult
|
|
TableID AnalyzeTableID
|
|
Count int64
|
|
StatsVer int
|
|
// Snapshot is the snapshot timestamp when we start the analysis job.
|
|
Snapshot uint64
|
|
// BaseCount is the original count in mysql.stats_meta at the beginning of analyze.
|
|
BaseCount int64
|
|
// BaseModifyCnt is the original modify_count in mysql.stats_meta at the beginning of analyze.
|
|
BaseModifyCnt int64
|
|
// For multi-valued index analyze, there are some very different behaviors, so we add this field to indicate it.
|
|
//
|
|
// Analyze result of multi-valued index come from an independent v2 analyze index task (AnalyzeIndexExec), and it's
|
|
// done by a scan on the index data and building stats. According to the original design rational of v2 stats, we
|
|
// should use the same samples to build stats for all columns/indexes. We created an exceptional case here to avoid
|
|
// loading the samples of JSON columns to tidb, which may cost too much memory, and we can't handle such case very
|
|
// well now.
|
|
//
|
|
// As the definition of multi-valued index, the row count and NDV of this index may be higher than the table row
|
|
// count. So we can't use this result to update the table-level row count.
|
|
// The snapshot field is used by v2 analyze to check if there are concurrent analyze, so we also can't update it.
|
|
// The multi-valued index analyze task is always together with another normal v2 analyze table task, which will
|
|
// take care of those table-level fields.
|
|
// In conclusion, when saving the analyze result for mv index, we need to store the index stats, as for the
|
|
// table-level fields, we only need to update the version.
|
|
//
|
|
// The global index has only one key range, so an independent task is used to process it.
|
|
// Global index needs to update only the version at the table-level fields, just like mv index.
|
|
ForMVIndexOrGlobalIndex bool
|
|
}
|
|
|
|
// DestroyAndPutToPool destroys the result and put it to the pool.
|
|
func (a *AnalyzeResults) DestroyAndPutToPool() {
|
|
for _, f := range a.Ars {
|
|
f.DestroyAndPutToPool()
|
|
}
|
|
}
|