Files
tidb/pkg/distsql/select_result.go

1167 lines
36 KiB
Go

// Copyright 2018 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package distsql
import (
"bytes"
"container/heap"
"context"
"fmt"
"strconv"
"sync/atomic"
"time"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
rmpb "github.com/pingcap/kvproto/pkg/resource_manager"
"github.com/pingcap/tidb/pkg/config"
dcontext "github.com/pingcap/tidb/pkg/distsql/context"
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/parser/terror"
"github.com/pingcap/tidb/pkg/planner/util"
"github.com/pingcap/tidb/pkg/sessionctx/vardef"
"github.com/pingcap/tidb/pkg/store/copr"
"github.com/pingcap/tidb/pkg/telemetry"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/chunk"
"github.com/pingcap/tidb/pkg/util/codec"
"github.com/pingcap/tidb/pkg/util/dbterror"
"github.com/pingcap/tidb/pkg/util/execdetails"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/pingcap/tidb/pkg/util/memory"
"github.com/pingcap/tipb/go-tipb"
tikvmetrics "github.com/tikv/client-go/v2/metrics"
"github.com/tikv/client-go/v2/tikv"
clientutil "github.com/tikv/client-go/v2/util"
"go.uber.org/zap"
)
var (
telemetryBatchedQueryTaskCnt = metrics.TelemetryBatchedQueryTaskCnt
telemetryStoreBatchedCnt = metrics.TelemetryStoreBatchedCnt
telemetryStoreBatchedFallbackCnt = metrics.TelemetryStoreBatchedFallbackCnt
)
var (
_ SelectResult = (*selectResult)(nil)
_ SelectResult = (*serialSelectResults)(nil)
_ SelectResult = (*sortedSelectResults)(nil)
)
// SelectResult is an iterator of coprocessor partial results.
type SelectResult interface {
// NextRaw gets the next raw result.
NextRaw(context.Context) ([]byte, error)
// Next reads the data into chunk.
Next(context.Context, *chunk.Chunk) error
// IntoIter converts the SelectResult into an iterator.
IntoIter([][]*types.FieldType) (SelectResultIter, error)
// Close closes the iterator.
Close() error
}
// SelectResultRow indicates the row returned by the SelectResultIter
type SelectResultRow struct {
// `ChannelIndex` indicates the index where this row locates.
// When ChannelIndex < len(IntermediateChannels), it means this row is from intermediate result.
// Otherwise, if ChannelIndex == len(IntermediateChannels), it means this row is from final result.
ChannelIndex int
// Row is the actual data of this row.
chunk.Row
}
// SelectResultIter is an iterator that is used to iterate the rows from SelectResult.
type SelectResultIter interface {
// Next returns the next row.
// If the iterator is drained, the `SelectResultRow.IsEmpty()` returns true.
Next(ctx context.Context) (SelectResultRow, error)
// Close closes the iterator.
Close() error
}
type chunkRowHeap struct {
*sortedSelectResults
}
func (h chunkRowHeap) Len() int {
return len(h.rowPtrs)
}
func (h chunkRowHeap) Less(i, j int) bool {
iPtr := h.rowPtrs[i]
jPtr := h.rowPtrs[j]
return h.lessRow(h.cachedChunks[iPtr.ChkIdx].GetRow(int(iPtr.RowIdx)),
h.cachedChunks[jPtr.ChkIdx].GetRow(int(jPtr.RowIdx)))
}
func (h chunkRowHeap) Swap(i, j int) {
h.rowPtrs[i], h.rowPtrs[j] = h.rowPtrs[j], h.rowPtrs[i]
}
func (h *chunkRowHeap) Push(x any) {
h.rowPtrs = append(h.rowPtrs, x.(chunk.RowPtr))
}
func (h *chunkRowHeap) Pop() any {
ret := h.rowPtrs[len(h.rowPtrs)-1]
h.rowPtrs = h.rowPtrs[0 : len(h.rowPtrs)-1]
return ret
}
// NewSortedSelectResults is only for partition table
// If schema == nil, sort by first few columns.
func NewSortedSelectResults(ectx expression.EvalContext, selectResult []SelectResult, schema *expression.Schema, byitems []*util.ByItems, memTracker *memory.Tracker) SelectResult {
s := &sortedSelectResults{
schema: schema,
selectResult: selectResult,
byItems: byitems,
memTracker: memTracker,
}
s.initCompareFuncs(ectx)
s.buildKeyColumns()
s.heap = &chunkRowHeap{s}
s.cachedChunks = make([]*chunk.Chunk, len(selectResult))
return s
}
type sortedSelectResults struct {
schema *expression.Schema
selectResult []SelectResult
compareFuncs []chunk.CompareFunc
byItems []*util.ByItems
keyColumns []int
cachedChunks []*chunk.Chunk
rowPtrs []chunk.RowPtr
heap *chunkRowHeap
memTracker *memory.Tracker
}
func (ssr *sortedSelectResults) updateCachedChunk(ctx context.Context, idx uint32) error {
prevMemUsage := ssr.cachedChunks[idx].MemoryUsage()
if err := ssr.selectResult[idx].Next(ctx, ssr.cachedChunks[idx]); err != nil {
return err
}
ssr.memTracker.Consume(ssr.cachedChunks[idx].MemoryUsage() - prevMemUsage)
if ssr.cachedChunks[idx].NumRows() == 0 {
return nil
}
heap.Push(ssr.heap, chunk.RowPtr{ChkIdx: idx, RowIdx: 0})
return nil
}
func (ssr *sortedSelectResults) initCompareFuncs(ectx expression.EvalContext) {
ssr.compareFuncs = make([]chunk.CompareFunc, len(ssr.byItems))
for i, item := range ssr.byItems {
keyType := item.Expr.GetType(ectx)
ssr.compareFuncs[i] = chunk.GetCompareFunc(keyType)
}
}
func (ssr *sortedSelectResults) buildKeyColumns() {
ssr.keyColumns = make([]int, 0, len(ssr.byItems))
for i, by := range ssr.byItems {
col := by.Expr.(*expression.Column)
if ssr.schema == nil {
ssr.keyColumns = append(ssr.keyColumns, i)
} else {
ssr.keyColumns = append(ssr.keyColumns, ssr.schema.ColumnIndex(col))
}
}
}
func (ssr *sortedSelectResults) lessRow(rowI, rowJ chunk.Row) bool {
for i, colIdx := range ssr.keyColumns {
cmpFunc := ssr.compareFuncs[i]
cmp := cmpFunc(rowI, colIdx, rowJ, colIdx)
if ssr.byItems[i].Desc {
cmp = -cmp
}
if cmp < 0 {
return true
} else if cmp > 0 {
return false
}
}
return false
}
func (*sortedSelectResults) NextRaw(context.Context) ([]byte, error) {
panic("Not support NextRaw for sortedSelectResults")
}
func (ssr *sortedSelectResults) Next(ctx context.Context, c *chunk.Chunk) (err error) {
c.Reset()
for i := range ssr.cachedChunks {
if ssr.cachedChunks[i] == nil {
ssr.cachedChunks[i] = c.CopyConstruct()
ssr.memTracker.Consume(ssr.cachedChunks[i].MemoryUsage())
}
}
if ssr.heap.Len() == 0 {
for i := range ssr.cachedChunks {
if err = ssr.updateCachedChunk(ctx, uint32(i)); err != nil {
return err
}
}
}
for c.NumRows() < c.RequiredRows() {
if ssr.heap.Len() == 0 {
break
}
idx := heap.Pop(ssr.heap).(chunk.RowPtr)
c.AppendRow(ssr.cachedChunks[idx.ChkIdx].GetRow(int(idx.RowIdx)))
if int(idx.RowIdx) >= ssr.cachedChunks[idx.ChkIdx].NumRows()-1 {
if err = ssr.updateCachedChunk(ctx, idx.ChkIdx); err != nil {
return err
}
} else {
heap.Push(ssr.heap, chunk.RowPtr{ChkIdx: idx.ChkIdx, RowIdx: idx.RowIdx + 1})
}
}
return nil
}
func (*sortedSelectResults) IntoIter(_ [][]*types.FieldType) (SelectResultIter, error) {
return nil, errors.New("not implemented")
}
func (ssr *sortedSelectResults) Close() (err error) {
for i, sr := range ssr.selectResult {
err = sr.Close()
if err != nil {
return err
}
ssr.memTracker.Consume(-ssr.cachedChunks[i].MemoryUsage())
ssr.cachedChunks[i] = nil
}
return nil
}
// NewSerialSelectResults create a SelectResult which will read each SelectResult serially.
func NewSerialSelectResults(selectResults []SelectResult) SelectResult {
return &serialSelectResults{
selectResults: selectResults,
cur: 0,
}
}
// serialSelectResults reads each SelectResult serially
type serialSelectResults struct {
selectResults []SelectResult
cur int
}
func (ssr *serialSelectResults) NextRaw(ctx context.Context) ([]byte, error) {
for ssr.cur < len(ssr.selectResults) {
resultSubset, err := ssr.selectResults[ssr.cur].NextRaw(ctx)
if err != nil {
return nil, err
}
if len(resultSubset) > 0 {
return resultSubset, nil
}
ssr.cur++ // move to the next SelectResult
}
return nil, nil
}
func (ssr *serialSelectResults) Next(ctx context.Context, chk *chunk.Chunk) error {
for ssr.cur < len(ssr.selectResults) {
if err := ssr.selectResults[ssr.cur].Next(ctx, chk); err != nil {
return err
}
if chk.NumRows() > 0 {
return nil
}
ssr.cur++ // move to the next SelectResult
}
return nil
}
func (*serialSelectResults) IntoIter(_ [][]*types.FieldType) (SelectResultIter, error) {
return nil, errors.New("not implemented")
}
func (ssr *serialSelectResults) Close() (err error) {
for _, r := range ssr.selectResults {
if rerr := r.Close(); rerr != nil {
err = rerr
}
}
return
}
type selectResult struct {
label string
resp kv.Response
rowLen int
fieldTypes []*types.FieldType
intermediateOutputTypes [][]*types.FieldType
ctx *dcontext.DistSQLContext
selectResp *tipb.SelectResponse
selectRespSize int64 // record the selectResp.Size() when it is initialized.
respChkIdx int
respChunkDecoder *chunk.Decoder
partialCount int64 // number of partial results.
sqlType string
// copPlanIDs contains all copTasks' planIDs,
// which help to collect copTasks' runtime stats.
copPlanIDs []int
rootPlanID int
storeType kv.StoreType
fetchDuration time.Duration
durationReported bool
memTracker *memory.Tracker
stats *selectResultRuntimeStats
// distSQLConcurrency and paging are only for collecting information, and they don't affect the process of execution.
distSQLConcurrency int
paging bool
iter *selectResultIter
}
func (r *selectResult) fetchResp(ctx context.Context) error {
return r.fetchRespWithIntermediateResults(ctx, nil)
}
func (r *selectResult) fetchRespWithIntermediateResults(ctx context.Context, intermediateOutputTypes [][]*types.FieldType) error {
defer func() {
if r.stats != nil {
// Ignore internal sql.
if !r.ctx.InRestrictedSQL && r.stats.copRespTime.Size() > 0 {
ratio := r.stats.calcCacheHit()
if ratio >= 1 {
telemetry.CurrentCoprCacheHitRatioGTE100Count.Inc()
}
if ratio >= 0.8 {
telemetry.CurrentCoprCacheHitRatioGTE80Count.Inc()
}
if ratio >= 0.4 {
telemetry.CurrentCoprCacheHitRatioGTE40Count.Inc()
}
if ratio >= 0.2 {
telemetry.CurrentCoprCacheHitRatioGTE20Count.Inc()
}
if ratio >= 0.1 {
telemetry.CurrentCoprCacheHitRatioGTE10Count.Inc()
}
if ratio >= 0.01 {
telemetry.CurrentCoprCacheHitRatioGTE1Count.Inc()
}
if ratio >= 0 {
telemetry.CurrentCoprCacheHitRatioGTE0Count.Inc()
}
}
}
}()
for {
r.respChkIdx = 0
startTime := time.Now()
resultSubset, err := r.resp.Next(ctx)
duration := time.Since(startTime)
r.fetchDuration += duration
if err != nil {
return errors.Trace(err)
}
if r.selectResp != nil {
r.memConsume(-atomic.LoadInt64(&r.selectRespSize))
}
if resultSubset == nil {
r.selectResp = nil
atomic.StoreInt64(&r.selectRespSize, 0)
if !r.durationReported {
// final round of fetch
// TODO: Add a label to distinguish between success or failure.
// https://github.com/pingcap/tidb/issues/11397
if r.paging {
metrics.DistSQLQueryHistogram.WithLabelValues(r.label, r.sqlType, "paging").Observe(r.fetchDuration.Seconds())
} else {
metrics.DistSQLQueryHistogram.WithLabelValues(r.label, r.sqlType, "common").Observe(r.fetchDuration.Seconds())
}
r.durationReported = true
}
return nil
}
r.selectResp = new(tipb.SelectResponse)
err = r.selectResp.Unmarshal(resultSubset.GetData())
if err != nil {
return errors.Trace(err)
}
respSize := int64(r.selectResp.Size())
atomic.StoreInt64(&r.selectRespSize, respSize)
r.memConsume(respSize)
if err := r.selectResp.Error; err != nil {
return dbterror.ClassTiKV.Synthesize(terror.ErrCode(err.Code), err.Msg)
}
if len(r.selectResp.IntermediateOutputs) != len(intermediateOutputTypes) {
return errors.Errorf(
"The length of intermediate output types %d mismatches the length of got intermediate outputs %d."+
" If a response contains intermediate outputs, you should use the SelectResultIter to read the data.",
len(intermediateOutputTypes), len(r.selectResp.IntermediateOutputs),
)
}
r.intermediateOutputTypes = intermediateOutputTypes
if err = r.ctx.SQLKiller.HandleSignal(); err != nil {
return err
}
for _, warning := range r.selectResp.Warnings {
r.ctx.AppendWarning(dbterror.ClassTiKV.Synthesize(terror.ErrCode(warning.Code), warning.Msg))
}
r.partialCount++
hasStats, ok := resultSubset.(CopRuntimeStats)
if ok {
copStats := hasStats.GetCopRuntimeStats()
if copStats != nil {
if err := r.updateCopRuntimeStats(ctx, copStats, resultSubset.RespTime(), false); err != nil {
return err
}
r.ctx.ExecDetails.MergeCopExecDetails(&copStats.CopExecDetails, duration)
}
}
if len(r.selectResp.Chunks) != 0 {
break
}
if intermediate := r.selectResp.IntermediateOutputs; len(intermediate) > 0 {
for _, output := range intermediate {
if len(output.Chunks) != 0 {
// some intermediate output contains data,
// we should return the response even if the main output is empty.
return nil
}
}
}
}
return nil
}
func (r *selectResult) Next(ctx context.Context, chk *chunk.Chunk) error {
if r.iter != nil {
return errors.New("selectResult is invalid after IntoIter()")
}
chk.Reset()
if r.selectResp == nil || r.respChkIdx == len(r.selectResp.Chunks) {
err := r.fetchResp(ctx)
if err != nil {
return err
}
if r.selectResp == nil {
return nil
}
failpoint.Inject("mockConsumeSelectRespSlow", func(val failpoint.Value) {
time.Sleep(time.Duration(val.(int) * int(time.Millisecond)))
})
}
// TODO(Shenghui Wu): add metrics
encodeType := r.selectResp.GetEncodeType()
switch encodeType {
case tipb.EncodeType_TypeDefault:
return r.readFromDefault(ctx, chk)
case tipb.EncodeType_TypeChunk:
return r.readFromChunk(ctx, chk)
}
return errors.Errorf("unsupported encode type:%v", encodeType)
}
func (r *selectResult) IntoIter(intermediateFieldTypes [][]*types.FieldType) (SelectResultIter, error) {
if r.iter != nil {
return nil, errors.New("selectResult is invalid after IntoIter()")
}
return newSelectResultIter(r, intermediateFieldTypes), nil
}
// NextRaw returns the next raw partial result.
func (r *selectResult) NextRaw(ctx context.Context) (data []byte, err error) {
failpoint.Inject("mockNextRawError", func(val failpoint.Value) {
if val.(bool) {
failpoint.Return(nil, errors.New("mockNextRawError"))
}
})
if r.iter != nil {
return nil, errors.New("selectResult is invalid after IntoIter()")
}
resultSubset, err := r.resp.Next(ctx)
r.partialCount++
if resultSubset != nil && err == nil {
data = resultSubset.GetData()
}
return data, err
}
func (r *selectResult) readFromDefault(ctx context.Context, chk *chunk.Chunk) error {
for !chk.IsFull() {
if r.respChkIdx == len(r.selectResp.Chunks) {
err := r.fetchResp(ctx)
if err != nil || r.selectResp == nil {
return err
}
}
err := r.readRowsData(chk)
if err != nil {
return err
}
if len(r.selectResp.Chunks[r.respChkIdx].RowsData) == 0 {
r.respChkIdx++
}
}
return nil
}
func (r *selectResult) readFromChunk(ctx context.Context, chk *chunk.Chunk) error {
if r.respChunkDecoder == nil {
r.respChunkDecoder = chunk.NewDecoder(
chunk.NewChunkWithCapacity(r.fieldTypes, 0),
r.fieldTypes,
)
}
for !chk.IsFull() {
if r.respChkIdx == len(r.selectResp.Chunks) {
err := r.fetchResp(ctx)
if err != nil || r.selectResp == nil {
return err
}
}
if r.respChunkDecoder.IsFinished() {
r.respChunkDecoder.Reset(r.selectResp.Chunks[r.respChkIdx].RowsData)
}
// If the next chunk size is greater than required rows * 0.8, reuse the memory of the next chunk and return
// immediately. Otherwise, splice the data to one chunk and wait the next chunk.
if r.respChunkDecoder.RemainedRows() > int(float64(chk.RequiredRows())*0.8) {
if chk.NumRows() > 0 {
return nil
}
r.respChunkDecoder.ReuseIntermChk(chk)
r.respChkIdx++
return nil
}
r.respChunkDecoder.Decode(chk)
if r.respChunkDecoder.IsFinished() {
r.respChkIdx++
}
}
return nil
}
// FillDummySummariesForTiFlashTasks fills dummy execution summaries for mpp tasks which lack summaries
func FillDummySummariesForTiFlashTasks(runtimeStatsColl *execdetails.RuntimeStatsColl, storeType kv.StoreType, allPlanIDs []int, recordedPlanIDs map[int]int) {
num := uint64(0)
dummySummary := &tipb.ExecutorExecutionSummary{TimeProcessedNs: &num, NumProducedRows: &num, NumIterations: &num, ExecutorId: nil}
for _, planID := range allPlanIDs {
if _, ok := recordedPlanIDs[planID]; !ok {
runtimeStatsColl.RecordOneCopTask(planID, storeType, dummySummary)
}
}
}
// recordExecutionSummariesForTiFlashTasks records mpp task execution summaries
func recordExecutionSummariesForTiFlashTasks(runtimeStatsColl *execdetails.RuntimeStatsColl, executionSummaries []*tipb.ExecutorExecutionSummary, storeType kv.StoreType, allPlanIDs []int) {
var recordedPlanIDs = make(map[int]int)
for _, detail := range executionSummaries {
if detail != nil && detail.TimeProcessedNs != nil &&
detail.NumProducedRows != nil && detail.NumIterations != nil {
recordedPlanIDs[runtimeStatsColl.
RecordOneCopTask(-1, storeType, detail)] = 0
}
}
FillDummySummariesForTiFlashTasks(runtimeStatsColl, storeType, allPlanIDs, recordedPlanIDs)
}
func (r *selectResult) updateCopRuntimeStats(ctx context.Context, copStats *copr.CopRuntimeStats, respTime time.Duration, forUnconsumedStats bool) (err error) {
callee := copStats.CalleeAddress
if r.rootPlanID <= 0 || r.ctx.RuntimeStatsColl == nil || (callee == "" && (copStats.ReqStats == nil || copStats.ReqStats.GetRPCStatsCount() == 0)) {
return
}
if (copStats.ScanDetail != nil && copStats.ScanDetail.ProcessedKeys > 0) || copStats.TimeDetail.KvReadWallTime > 0 {
var readKeys int64
var readSize float64
if copStats.ScanDetail != nil {
readKeys = copStats.ScanDetail.ProcessedKeys
readSize = float64(copStats.ScanDetail.ProcessedKeysSize)
}
readTime := copStats.TimeDetail.KvReadWallTime.Seconds()
tikvmetrics.ObserveReadSLI(uint64(readKeys), readTime, readSize)
}
if r.stats == nil {
r.stats = &selectResultRuntimeStats{
distSQLConcurrency: r.distSQLConcurrency,
}
if ci, ok := r.resp.(copr.CopInfo); ok {
conc, extraConc := ci.GetConcurrency()
r.stats.distSQLConcurrency = conc
r.stats.extraConcurrency = extraConc
}
}
r.stats.mergeCopRuntimeStats(copStats, respTime)
// If hasExecutor is true, it means the summary is returned from TiFlash.
hasExecutor := false
for _, detail := range r.selectResp.GetExecutionSummaries() {
if detail != nil && detail.TimeProcessedNs != nil &&
detail.NumProducedRows != nil && detail.NumIterations != nil {
if detail.ExecutorId != nil {
hasExecutor = true
}
break
}
}
if ruDetailsRaw := ctx.Value(clientutil.RUDetailsCtxKey); ruDetailsRaw != nil && r.storeType == kv.TiFlash {
if err = execdetails.MergeTiFlashRUConsumption(r.selectResp.GetExecutionSummaries(), ruDetailsRaw.(*clientutil.RUDetails)); err != nil {
return err
}
}
if copStats.TimeDetail.ProcessTime > 0 {
r.ctx.CPUUsage.MergeTikvCPUTime(copStats.TimeDetail.ProcessTime)
}
if hasExecutor {
if len(r.copPlanIDs) > 0 {
r.ctx.RuntimeStatsColl.RecordCopStats(r.copPlanIDs[len(r.copPlanIDs)-1], r.storeType, copStats.ScanDetail, copStats.TimeDetail, nil)
}
recordExecutionSummariesForTiFlashTasks(r.ctx.RuntimeStatsColl, r.selectResp.GetExecutionSummaries(), r.storeType, r.copPlanIDs)
// report MPP cross AZ network traffic bytes to resource control manager.
interZoneBytes := r.ctx.RuntimeStatsColl.GetStmtCopRuntimeStats().TiflashNetworkStats.GetInterZoneTrafficBytes()
if interZoneBytes > 0 {
consumption := &rmpb.Consumption{
ReadCrossAzTrafficBytes: interZoneBytes,
}
if r.ctx.RUConsumptionReporter != nil {
r.ctx.RUConsumptionReporter.ReportConsumption(r.ctx.ResourceGroupName, consumption)
}
}
} else {
// For cop task cases, we still need this protection.
if len(r.selectResp.GetExecutionSummaries()) != len(r.copPlanIDs) {
// for TiFlash streaming call(BatchCop and MPP), it is by design that only the last response will
// carry the execution summaries, so it is ok if some responses have no execution summaries, should
// not trigger an error log in this case.
if !forUnconsumedStats && !(r.storeType == kv.TiFlash && len(r.selectResp.GetExecutionSummaries()) == 0) {
logutil.Logger(ctx).Warn("invalid cop task execution summaries length",
zap.Int("expected", len(r.copPlanIDs)),
zap.Int("received", len(r.selectResp.GetExecutionSummaries())))
}
return
}
for i, detail := range r.selectResp.GetExecutionSummaries() {
var summary *tipb.ExecutorExecutionSummary
if detail != nil && detail.TimeProcessedNs != nil &&
detail.NumProducedRows != nil && detail.NumIterations != nil {
summary = detail
}
planID := r.copPlanIDs[i]
if i == len(r.copPlanIDs)-1 {
r.ctx.RuntimeStatsColl.RecordCopStats(planID, r.storeType, copStats.ScanDetail, copStats.TimeDetail, summary)
} else if summary != nil {
r.ctx.RuntimeStatsColl.RecordOneCopTask(planID, r.storeType, summary)
}
}
}
return
}
func (r *selectResult) readRowsData(chk *chunk.Chunk) (err error) {
rowsData := r.selectResp.Chunks[r.respChkIdx].RowsData
decoder := codec.NewDecoder(chk, r.ctx.Location)
for !chk.IsFull() && len(rowsData) > 0 {
for i := range r.rowLen {
rowsData, err = decoder.DecodeOne(rowsData, i, r.fieldTypes[i])
if err != nil {
return err
}
}
}
r.selectResp.Chunks[r.respChkIdx].RowsData = rowsData
return nil
}
func (r *selectResult) memConsume(bytes int64) {
if r.memTracker != nil {
r.memTracker.Consume(bytes)
}
}
// Close closes selectResult.
func (r *selectResult) Close() error {
if r.iter != nil {
return errors.New("selectResult is invalid after IntoIter()")
}
return r.close()
}
func (r *selectResult) close() error {
metrics.DistSQLPartialCountHistogram.Observe(float64(r.partialCount))
respSize := atomic.SwapInt64(&r.selectRespSize, 0)
if respSize > 0 {
r.memConsume(-respSize)
}
if r.ctx != nil {
if unconsumed, ok := r.resp.(copr.HasUnconsumedCopRuntimeStats); ok && unconsumed != nil {
unconsumedCopStats := unconsumed.CollectUnconsumedCopRuntimeStats()
for _, copStats := range unconsumedCopStats {
_ = r.updateCopRuntimeStats(context.Background(), copStats, time.Duration(0), true)
r.ctx.ExecDetails.MergeCopExecDetails(&copStats.CopExecDetails, 0)
}
}
}
if r.stats != nil && r.ctx != nil {
defer func() {
if ci, ok := r.resp.(copr.CopInfo); ok {
r.stats.buildTaskDuration = ci.GetBuildTaskElapsed()
batched, fallback := ci.GetStoreBatchInfo()
if batched != 0 || fallback != 0 {
r.stats.storeBatchedNum, r.stats.storeBatchedFallbackNum = batched, fallback
telemetryStoreBatchedCnt.Add(float64(r.stats.storeBatchedNum))
telemetryStoreBatchedFallbackCnt.Add(float64(r.stats.storeBatchedFallbackNum))
telemetryBatchedQueryTaskCnt.Add(float64(r.stats.copRespTime.Size()))
}
}
r.stats.fetchRspDuration = r.fetchDuration
r.ctx.RuntimeStatsColl.RegisterStats(r.rootPlanID, r.stats)
}()
}
return r.resp.Close()
}
type selRespChannelIter struct {
channel int
loc *time.Location
rowLen int
fieldTypes []*types.FieldType
encodeType tipb.EncodeType
chkData []tipb.Chunk
// reserveChkSize indicates the reserved size for each chunk. (Only for default encoding)
reserveChkSize int
// curChkIdx indicates the index of the current chunk in chkData read currently.
curChkIdx int
// chk buffers the rows read from the current response
chk *chunk.Chunk
// offset indicates the read offset in iter.chk
chkOffset int
}
func newSelRespChannelIter(result *selectResult, channel int) (*selRespChannelIter, error) {
intest.Assert(result != nil && result.selectResp != nil && len(result.selectResp.IntermediateOutputs) == len(result.intermediateOutputTypes))
var rowLen int
var fieldTypes []*types.FieldType
var encodeType tipb.EncodeType
var chkData []tipb.Chunk
intermediateOutputs := result.selectResp.IntermediateOutputs
if intermediateOutputsLen := len(intermediateOutputs); channel < intermediateOutputsLen {
fieldTypes = result.intermediateOutputTypes[channel]
rowLen = len(fieldTypes)
encodeType = intermediateOutputs[channel].GetEncodeType()
chkData = intermediateOutputs[channel].GetChunks()
} else if channel == intermediateOutputsLen {
rowLen = result.rowLen
fieldTypes = result.fieldTypes
encodeType = result.selectResp.GetEncodeType()
chkData = result.selectResp.GetChunks()
} else {
return nil, errors.Errorf(
"invalid channel %d for selectResp with %d intermediate outputs",
channel, intermediateOutputsLen,
)
}
return &selRespChannelIter{
channel: channel,
loc: result.ctx.Location,
rowLen: rowLen,
fieldTypes: fieldTypes,
encodeType: encodeType,
chkData: chkData,
reserveChkSize: vardef.DefInitChunkSize,
}, nil
}
func (iter *selRespChannelIter) Channel() int {
return iter.channel
}
func (iter *selRespChannelIter) Next() (SelectResultRow, error) {
if iter.chk != nil && iter.chkOffset < iter.chk.NumRows() {
iter.chkOffset++
return SelectResultRow{
ChannelIndex: iter.channel,
Row: iter.chk.GetRow(iter.chkOffset - 1),
}, nil
}
if err := iter.nextChunk(); err != nil || iter.chk == nil {
return SelectResultRow{}, err
}
iter.chkOffset = 1
return SelectResultRow{
ChannelIndex: iter.channel,
Row: iter.chk.GetRow(0),
}, nil
}
func (iter *selRespChannelIter) nextChunk() error {
iter.chk = nil
for iter.curChkIdx < len(iter.chkData) {
curData := &iter.chkData[iter.curChkIdx]
if len(curData.RowsData) == 0 {
iter.curChkIdx++
continue
}
switch iter.encodeType {
case tipb.EncodeType_TypeDefault:
var err error
newChk, leftRowsData, err := iter.fillChunkFromDefault(iter.chk, curData.RowsData)
if err != nil {
return err
}
iter.chk = newChk
curData.RowsData = leftRowsData
if newChk.NumRows() < newChk.RequiredRows() {
continue
}
case tipb.EncodeType_TypeChunk:
iter.chk = chunk.NewChunkWithCapacity(iter.fieldTypes, 0)
chunk.NewDecoder(iter.chk, iter.fieldTypes).Reset(curData.RowsData)
curData.RowsData = nil
default:
return errors.Errorf("unsupported encode type: %v", iter.encodeType)
}
if iter.chk.NumRows() > 0 {
break
}
}
return nil
}
func (iter *selRespChannelIter) fillChunkFromDefault(chk *chunk.Chunk, rowsData []byte) (*chunk.Chunk, []byte, error) {
if chk == nil {
chk = chunk.NewChunkWithCapacity(iter.fieldTypes, iter.reserveChkSize)
}
decoder := codec.NewDecoder(chk, iter.loc)
for len(rowsData) > 0 && chk.NumRows() < chk.RequiredRows() {
for i := range iter.rowLen {
var err error
rowsData, err = decoder.DecodeOne(rowsData, i, iter.fieldTypes[i])
if err != nil {
return nil, nil, err
}
}
}
return chk, rowsData, nil
}
type selectResultIter struct {
result *selectResult
channels []*selRespChannelIter
intermediateOutputTypes [][]*types.FieldType
}
func newSelectResultIter(result *selectResult, intermediateOutputTypes [][]*types.FieldType) *selectResultIter {
intest.Assert(result != nil && result.iter == nil)
result.iter = &selectResultIter{
result: result,
intermediateOutputTypes: intermediateOutputTypes,
}
return result.iter
}
// Next implements the SelectResultIter interface.
func (iter *selectResultIter) Next(ctx context.Context) (SelectResultRow, error) {
for {
if r := iter.result; r.selectResp == nil {
if err := r.fetchRespWithIntermediateResults(ctx, iter.intermediateOutputTypes); err != nil {
return SelectResultRow{}, err
}
if r.selectResp == nil {
return SelectResultRow{}, nil
}
if iter.channels == nil {
iter.channels = make([]*selRespChannelIter, 0, len(iter.intermediateOutputTypes)+1)
}
for i := 0; i <= len(iter.intermediateOutputTypes); i++ {
ch, err := newSelRespChannelIter(r, i)
if err != nil {
return SelectResultRow{}, err
}
iter.channels = append(iter.channels, ch)
}
}
for len(iter.channels) > 0 {
// here we read the channel in reverse order to make sure the "more complete" data should be read first.
// For example, if a cop-request contains IndexLookUp, we should read the final rows first (with the biggest channel index),
// and then read the index rows (with smaller channel index) that have not been looked up.
lastPos := len(iter.channels) - 1
channel := iter.channels[lastPos]
row, err := channel.Next()
if err != nil || !row.IsEmpty() {
return row, err
}
iter.channels = iter.channels[:lastPos]
}
iter.result.selectResp = nil
}
}
// Close implements the SelectResultIter interface.
func (iter *selectResultIter) Close() error {
return iter.result.close()
}
// CopRuntimeStats is an interface uses to check whether the result has cop runtime stats.
type CopRuntimeStats interface {
// GetCopRuntimeStats gets the cop runtime stats information.
GetCopRuntimeStats() *copr.CopRuntimeStats
}
type selectResultRuntimeStats struct {
copRespTime execdetails.Percentile[execdetails.Duration]
procKeys execdetails.Percentile[execdetails.Int64]
backoffSleep map[string]time.Duration
totalProcessTime time.Duration
totalWaitTime time.Duration
reqStat *tikv.RegionRequestRuntimeStats
distSQLConcurrency int
extraConcurrency int
CoprCacheHitNum int64
storeBatchedNum uint64
storeBatchedFallbackNum uint64
buildTaskDuration time.Duration
fetchRspDuration time.Duration
}
func (s *selectResultRuntimeStats) mergeCopRuntimeStats(copStats *copr.CopRuntimeStats, respTime time.Duration) {
s.copRespTime.Add(execdetails.Duration(respTime))
procKeys := execdetails.Int64(0)
if copStats.ScanDetail != nil {
procKeys = execdetails.Int64(copStats.ScanDetail.ProcessedKeys)
}
s.procKeys.Add(procKeys)
if len(copStats.BackoffSleep) > 0 {
if s.backoffSleep == nil {
s.backoffSleep = make(map[string]time.Duration)
}
for k, v := range copStats.BackoffSleep {
s.backoffSleep[k] += v
}
}
s.totalProcessTime += copStats.TimeDetail.ProcessTime
s.totalWaitTime += copStats.TimeDetail.WaitTime
if copStats.ReqStats != nil {
if s.reqStat == nil {
s.reqStat = copStats.ReqStats
} else {
s.reqStat.Merge(copStats.ReqStats)
}
}
if copStats.CoprCacheHit {
s.CoprCacheHitNum++
}
}
func (s *selectResultRuntimeStats) Clone() execdetails.RuntimeStats {
newRs := selectResultRuntimeStats{
copRespTime: execdetails.Percentile[execdetails.Duration]{},
procKeys: execdetails.Percentile[execdetails.Int64]{},
backoffSleep: make(map[string]time.Duration, len(s.backoffSleep)),
reqStat: tikv.NewRegionRequestRuntimeStats(),
distSQLConcurrency: s.distSQLConcurrency,
extraConcurrency: s.extraConcurrency,
CoprCacheHitNum: s.CoprCacheHitNum,
storeBatchedNum: s.storeBatchedNum,
storeBatchedFallbackNum: s.storeBatchedFallbackNum,
buildTaskDuration: s.buildTaskDuration,
fetchRspDuration: s.fetchRspDuration,
}
newRs.copRespTime.MergePercentile(&s.copRespTime)
newRs.procKeys.MergePercentile(&s.procKeys)
for k, v := range s.backoffSleep {
newRs.backoffSleep[k] += v
}
newRs.totalProcessTime += s.totalProcessTime
newRs.totalWaitTime += s.totalWaitTime
newRs.reqStat = s.reqStat.Clone()
return &newRs
}
func (s *selectResultRuntimeStats) Merge(rs execdetails.RuntimeStats) {
other, ok := rs.(*selectResultRuntimeStats)
if !ok {
return
}
s.copRespTime.MergePercentile(&other.copRespTime)
s.procKeys.MergePercentile(&other.procKeys)
if len(other.backoffSleep) > 0 {
if s.backoffSleep == nil {
s.backoffSleep = make(map[string]time.Duration)
}
for k, v := range other.backoffSleep {
s.backoffSleep[k] += v
}
}
s.totalProcessTime += other.totalProcessTime
s.totalWaitTime += other.totalWaitTime
s.reqStat.Merge(other.reqStat)
s.CoprCacheHitNum += other.CoprCacheHitNum
if other.distSQLConcurrency > s.distSQLConcurrency {
s.distSQLConcurrency = other.distSQLConcurrency
}
if other.extraConcurrency > s.extraConcurrency {
s.extraConcurrency = other.extraConcurrency
}
s.storeBatchedNum += other.storeBatchedNum
s.storeBatchedFallbackNum += other.storeBatchedFallbackNum
s.buildTaskDuration += other.buildTaskDuration
s.fetchRspDuration += other.fetchRspDuration
}
func (s *selectResultRuntimeStats) String() string {
buf := bytes.NewBuffer(nil)
reqStat := s.reqStat
if s.copRespTime.Size() > 0 {
size := s.copRespTime.Size()
if size == 1 {
fmt.Fprintf(buf, "cop_task: {num: 1, max: %v, proc_keys: %v", execdetails.FormatDuration(time.Duration(s.copRespTime.GetPercentile(0))), s.procKeys.GetPercentile(0))
} else {
vMax, vMin := s.copRespTime.GetMax(), s.copRespTime.GetMin()
vP95 := s.copRespTime.GetPercentile(0.95)
sum := s.copRespTime.Sum()
vAvg := time.Duration(sum / float64(size))
keyMax := s.procKeys.GetMax()
keyP95 := s.procKeys.GetPercentile(0.95)
fmt.Fprintf(buf, "cop_task: {num: %v, max: %v, min: %v, avg: %v, p95: %v", size,
execdetails.FormatDuration(time.Duration(vMax.GetFloat64())), execdetails.FormatDuration(time.Duration(vMin.GetFloat64())),
execdetails.FormatDuration(vAvg), execdetails.FormatDuration(time.Duration(vP95)))
if keyMax > 0 {
buf.WriteString(", max_proc_keys: ")
buf.WriteString(strconv.FormatInt(int64(keyMax), 10))
buf.WriteString(", p95_proc_keys: ")
buf.WriteString(strconv.FormatInt(int64(keyP95), 10))
}
}
if s.totalProcessTime > 0 {
buf.WriteString(", tot_proc: ")
buf.WriteString(execdetails.FormatDuration(s.totalProcessTime))
if s.totalWaitTime > 0 {
buf.WriteString(", tot_wait: ")
buf.WriteString(execdetails.FormatDuration(s.totalWaitTime))
}
}
if config.GetGlobalConfig().TiKVClient.CoprCache.CapacityMB > 0 {
fmt.Fprintf(buf, ", copr_cache_hit_ratio: %v",
strconv.FormatFloat(s.calcCacheHit(), 'f', 2, 64))
} else {
buf.WriteString(", copr_cache: disabled")
}
if s.buildTaskDuration > 0 {
buf.WriteString(", build_task_duration: ")
buf.WriteString(execdetails.FormatDuration(s.buildTaskDuration))
}
if s.distSQLConcurrency > 0 {
buf.WriteString(", max_distsql_concurrency: ")
buf.WriteString(strconv.FormatInt(int64(s.distSQLConcurrency), 10))
}
if s.extraConcurrency > 0 {
buf.WriteString(", max_extra_concurrency: ")
buf.WriteString(strconv.FormatInt(int64(s.extraConcurrency), 10))
}
if s.storeBatchedNum > 0 {
buf.WriteString(", store_batch_num: ")
buf.WriteString(strconv.FormatInt(int64(s.storeBatchedNum), 10))
}
if s.storeBatchedFallbackNum > 0 {
buf.WriteString(", store_batch_fallback_num: ")
buf.WriteString(strconv.FormatInt(int64(s.storeBatchedFallbackNum), 10))
}
buf.WriteString("}")
if s.fetchRspDuration > 0 {
buf.WriteString(", fetch_resp_duration: ")
buf.WriteString(execdetails.FormatDuration(s.fetchRspDuration))
}
}
rpcStatsStr := reqStat.String()
if len(rpcStatsStr) > 0 {
buf.WriteString(", rpc_info:{")
buf.WriteString(rpcStatsStr)
buf.WriteString("}")
}
if len(s.backoffSleep) > 0 {
buf.WriteString(", backoff{")
idx := 0
for k, d := range s.backoffSleep {
if idx > 0 {
buf.WriteString(", ")
}
idx++
fmt.Fprintf(buf, "%s: %s", k, execdetails.FormatDuration(d))
}
buf.WriteString("}")
}
return buf.String()
}
// Tp implements the RuntimeStats interface.
func (*selectResultRuntimeStats) Tp() int {
return execdetails.TpSelectResultRuntimeStats
}
func (s *selectResultRuntimeStats) calcCacheHit() float64 {
hit := s.CoprCacheHitNum
tot := s.copRespTime.Size()
if s.storeBatchedNum > 0 {
tot += int(s.storeBatchedNum)
}
if tot == 0 {
return 0
}
return float64(hit) / float64(tot)
}