// Copyright 2021 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package statistics import ( "fmt" ) // NonPartitionTableID is the partition id for non-partition table. const NonPartitionTableID = -1 // AnalyzeTableID is hybrid table id used to analyze table. type AnalyzeTableID struct { TableID int64 // PartitionID is used for the construction of partition table statistics. It indicate the ID of the partition. // If the table is not the partition table, the PartitionID will be equal to NonPartitionTableID. PartitionID int64 } // GetStatisticsID is used to obtain the table ID to build statistics. // If the 'PartitionID == NonPartitionTableID', we use the TableID to build the statistics for non-partition tables. // Otherwise, we use the PartitionID to build the statistics of the partitions in the partition tables. func (h *AnalyzeTableID) GetStatisticsID() int64 { statisticsID := h.TableID if h.PartitionID != NonPartitionTableID { statisticsID = h.PartitionID } return statisticsID } // IsPartitionTable indicates whether the table is partition table. func (h *AnalyzeTableID) IsPartitionTable() bool { return h.PartitionID != NonPartitionTableID } func (h *AnalyzeTableID) String() string { return fmt.Sprintf("%d => %v", h.PartitionID, h.TableID) } // Equals indicates whether two table id is equal. func (h *AnalyzeTableID) Equals(t *AnalyzeTableID) bool { if h == t { return true } if h == nil || t == nil { return false } return h.TableID == t.TableID && h.PartitionID == t.PartitionID } // AnalyzeResult is used to represent analyze result. // In version2 analyze, we use the following structure to represent the analyze result. // It represents the list of analyze result for all columns when isIndex is 0. // Also represents the list of analyze result for all indexes when idIndex is 1. type AnalyzeResult struct { Hist []*Histogram Cms []*CMSketch TopNs []*TopN Fms []*FMSketch IsIndex int } // DestroyAndPutToPool destroys the result and put it to the pool. func (a *AnalyzeResult) DestroyAndPutToPool() { for _, f := range a.Fms { f.DestroyAndPutToPool() } for _, h := range a.Hist { h.DestroyAndPutToPool() } } // AnalyzeResults represents the analyze results of a task. type AnalyzeResults struct { Err error ExtStats *ExtendedStatsColl Job *AnalyzeJob // Ars: combine the analyze result of all columns and the analyze result of indexes. // (In stats version2) // For example: // If the tableA (c1, c2, c3) has indexes (c1, c2), (c2, c3), the result will be: // Ars: [AnalyzeResult1[c1, c2, c3], AnalyzeResult2[c1_c2, c2_c3]] Ars []*AnalyzeResult TableID AnalyzeTableID Count int64 StatsVer int // Snapshot is the snapshot timestamp when we start the analysis job. Snapshot uint64 // BaseCount is the original count in mysql.stats_meta at the beginning of analyze. BaseCount int64 // BaseModifyCnt is the original modify_count in mysql.stats_meta at the beginning of analyze. BaseModifyCnt int64 // For multi-valued index analyze, there are some very different behaviors, so we add this field to indicate it. // // Analyze result of multi-valued index come from an independent v2 analyze index task (AnalyzeIndexExec), and it's // done by a scan on the index data and building stats. According to the original design rational of v2 stats, we // should use the same samples to build stats for all columns/indexes. We created an exceptional case here to avoid // loading the samples of JSON columns to tidb, which may cost too much memory, and we can't handle such case very // well now. // // As the definition of multi-valued index, the row count and NDV of this index may be higher than the table row // count. So we can't use this result to update the table-level row count. // The snapshot field is used by v2 analyze to check if there are concurrent analyze, so we also can't update it. // The multi-valued index analyze task is always together with another normal v2 analyze table task, which will // take care of those table-level fields. // In conclusion, when saving the analyze result for mv index, we need to store the index stats, as for the // table-level fields, we only need to update the version. // // The global index has only one key range, so an independent task is used to process it. // Global index needs to update only the version at the table-level fields, just like mv index. ForMVIndexOrGlobalIndex bool } // DestroyAndPutToPool destroys the result and put it to the pool. func (a *AnalyzeResults) DestroyAndPutToPool() { for _, f := range a.Ars { f.DestroyAndPutToPool() } }