463 lines
13 KiB
Go
463 lines
13 KiB
Go
// Copyright 2021 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package executor
|
|
|
|
import (
|
|
"context"
|
|
|
|
"github.com/pingcap/errors"
|
|
"github.com/pingcap/tidb/pkg/executor/aggfuncs"
|
|
"github.com/pingcap/tidb/pkg/executor/internal/exec"
|
|
"github.com/pingcap/tidb/pkg/executor/internal/vecgroupchecker"
|
|
"github.com/pingcap/tidb/pkg/expression"
|
|
"github.com/pingcap/tidb/pkg/parser/ast"
|
|
"github.com/pingcap/tidb/pkg/planner/core"
|
|
"github.com/pingcap/tidb/pkg/sessionctx"
|
|
"github.com/pingcap/tidb/pkg/util/chunk"
|
|
)
|
|
|
|
type dataInfo struct {
|
|
chk *chunk.Chunk
|
|
remaining uint64
|
|
accumulated uint64
|
|
}
|
|
|
|
// PipelinedWindowExec is the executor for window functions.
|
|
type PipelinedWindowExec struct {
|
|
exec.BaseExecutor
|
|
numWindowFuncs int
|
|
windowFuncs []aggfuncs.AggFunc
|
|
slidingWindowFuncs []aggfuncs.SlidingWindowAggFunc
|
|
partialResults []aggfuncs.PartialResult
|
|
start *core.FrameBound
|
|
end *core.FrameBound
|
|
groupChecker *vecgroupchecker.VecGroupChecker
|
|
|
|
// childResult stores the child chunk. Note that even if remaining is 0, e.rows might still references rows in data[0].chk after returned it to upper executor, since there is no guarantee what the upper executor will do to the returned chunk, it might destroy the data (as in the benchmark test, it reused the chunk to pull data, and it will be chk.Reset(), causing panicking). So dataIdx, accumulated and dropped are added to ensure that chunk will only be returned if there is no row reference.
|
|
childResult *chunk.Chunk
|
|
data []dataInfo
|
|
dataIdx int
|
|
|
|
// done indicates the child executor is drained or something unexpected happened.
|
|
done bool
|
|
accumulated uint64
|
|
dropped uint64
|
|
rowToConsume uint64
|
|
newPartition bool
|
|
|
|
curRowIdx uint64
|
|
// curStartRow and curEndRow defines the current frame range
|
|
lastStartRow uint64
|
|
lastEndRow uint64
|
|
stagedStartRow uint64
|
|
stagedEndRow uint64
|
|
rowStart uint64
|
|
orderByCols []*expression.Column
|
|
// expectedCmpResult is used to decide if one value is included in the frame.
|
|
expectedCmpResult int64
|
|
|
|
// rows keeps rows starting from curStartRow
|
|
rows []chunk.Row
|
|
rowCnt uint64
|
|
whole bool
|
|
isRangeFrame bool
|
|
emptyFrame bool
|
|
initializedSlidingWindow bool
|
|
}
|
|
|
|
// Close implements the Executor Close interface.
|
|
func (e *PipelinedWindowExec) Close() error {
|
|
return errors.Trace(e.BaseExecutor.Close())
|
|
}
|
|
|
|
// Open implements the Executor Open interface
|
|
func (e *PipelinedWindowExec) Open(ctx context.Context) (err error) {
|
|
e.rowToConsume = 0
|
|
e.done = false
|
|
e.accumulated = 0
|
|
e.dropped = 0
|
|
e.data = make([]dataInfo, 0)
|
|
e.dataIdx = 0
|
|
e.slidingWindowFuncs = make([]aggfuncs.SlidingWindowAggFunc, len(e.windowFuncs))
|
|
for i, windowFunc := range e.windowFuncs {
|
|
if slidingWindowAggFunc, ok := windowFunc.(aggfuncs.SlidingWindowAggFunc); ok {
|
|
e.slidingWindowFuncs[i] = slidingWindowAggFunc
|
|
}
|
|
}
|
|
e.rows = make([]chunk.Row, 0)
|
|
return e.BaseExecutor.Open(ctx)
|
|
}
|
|
|
|
func (e *PipelinedWindowExec) firstResultChunkNotReady() bool {
|
|
if !e.done && len(e.data) == 0 {
|
|
return true
|
|
}
|
|
// chunk can't be ready unless, 1. all of the rows in the chunk is filled, 2. e.rows doesn't contain rows in the chunk
|
|
return len(e.data) > 0 && (e.data[0].remaining != 0 || e.data[0].accumulated > e.dropped)
|
|
}
|
|
|
|
// Next implements the Executor Next interface.
|
|
func (e *PipelinedWindowExec) Next(ctx context.Context, chk *chunk.Chunk) (err error) {
|
|
chk.Reset()
|
|
|
|
for e.firstResultChunkNotReady() {
|
|
// we firstly gathering enough rows and consume them, until we are able to produce.
|
|
// for unbounded frame, it needs consume the whole partition before being able to produce, in this case
|
|
// e.p.enoughToProduce will be false until so.
|
|
var enough bool
|
|
enough, err = e.enoughToProduce(e.Ctx())
|
|
if err != nil {
|
|
return
|
|
}
|
|
if !enough {
|
|
if !e.done && e.rowToConsume == 0 {
|
|
err = e.getRowsInPartition(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if e.done || e.newPartition {
|
|
e.finish()
|
|
// if we continued, the rows will not be consumed, so next time we should consume it instead of calling e.getRowsInPartition
|
|
enough, err = e.enoughToProduce(e.Ctx())
|
|
if err != nil {
|
|
return
|
|
}
|
|
if enough {
|
|
continue
|
|
}
|
|
e.newPartition = false
|
|
e.reset()
|
|
if e.rowToConsume == 0 {
|
|
// no more data
|
|
break
|
|
}
|
|
}
|
|
e.rowCnt += e.rowToConsume
|
|
e.rowToConsume = 0
|
|
}
|
|
|
|
// e.p is ready to produce data
|
|
if len(e.data) > e.dataIdx && e.data[e.dataIdx].remaining != 0 {
|
|
produced, err := e.produce(e.Ctx(), e.data[e.dataIdx].chk, e.data[e.dataIdx].remaining)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
e.data[e.dataIdx].remaining -= produced
|
|
if e.data[e.dataIdx].remaining == 0 {
|
|
e.dataIdx++
|
|
}
|
|
}
|
|
}
|
|
if len(e.data) > 0 {
|
|
chk.SwapColumns(e.data[0].chk)
|
|
e.data = e.data[1:]
|
|
e.dataIdx--
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (e *PipelinedWindowExec) getRowsInPartition(ctx context.Context) (err error) {
|
|
e.newPartition = true
|
|
if len(e.rows) == 0 {
|
|
// if getRowsInPartition is called for the first time, we ignore it as a new partition
|
|
e.newPartition = false
|
|
}
|
|
|
|
if e.groupChecker.IsExhausted() {
|
|
var drained, samePartition bool
|
|
drained, err = e.fetchChild(ctx)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
// we return immediately to use a combination of true newPartition but 0 in e.rowToConsume to indicate the data source is drained,
|
|
if drained {
|
|
e.done = true
|
|
return nil
|
|
}
|
|
samePartition, err = e.groupChecker.SplitIntoGroups(e.childResult)
|
|
if samePartition {
|
|
// the only case that when getRowsInPartition gets called, it is not a new partition.
|
|
e.newPartition = false
|
|
}
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
}
|
|
begin, end := e.groupChecker.GetNextGroup()
|
|
e.rowToConsume += uint64(end - begin)
|
|
for i := begin; i < end; i++ {
|
|
e.rows = append(e.rows, e.childResult.GetRow(i))
|
|
}
|
|
return
|
|
}
|
|
|
|
func (e *PipelinedWindowExec) fetchChild(ctx context.Context) (eof bool, err error) {
|
|
// TODO: reuse chunks
|
|
childResult := exec.TryNewCacheChunk(e.Children(0))
|
|
err = exec.Next(ctx, e.Children(0), childResult)
|
|
if err != nil {
|
|
return false, errors.Trace(err)
|
|
}
|
|
// No more data.
|
|
numRows := childResult.NumRows()
|
|
if numRows == 0 {
|
|
return true, nil
|
|
}
|
|
|
|
// TODO: reuse chunks
|
|
resultChk := e.AllocPool.Alloc(e.RetFieldTypes(), 0, numRows)
|
|
err = e.copyChk(childResult, resultChk)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
e.accumulated += uint64(numRows)
|
|
e.data = append(e.data, dataInfo{chk: resultChk, remaining: uint64(numRows), accumulated: e.accumulated})
|
|
|
|
e.childResult = childResult
|
|
return false, nil
|
|
}
|
|
|
|
func (e *PipelinedWindowExec) copyChk(src, dst *chunk.Chunk) error {
|
|
columns := e.Schema().Columns[:len(e.Schema().Columns)-e.numWindowFuncs]
|
|
for i, col := range columns {
|
|
if err := dst.MakeRefTo(i, src, col.Index); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (e *PipelinedWindowExec) getRow(i uint64) chunk.Row {
|
|
return e.rows[i-e.rowStart]
|
|
}
|
|
|
|
func (e *PipelinedWindowExec) getRows(start, end uint64) []chunk.Row {
|
|
return e.rows[start-e.rowStart : end-e.rowStart]
|
|
}
|
|
|
|
// finish is called upon a whole partition is consumed
|
|
func (e *PipelinedWindowExec) finish() {
|
|
e.whole = true
|
|
}
|
|
|
|
func (e *PipelinedWindowExec) getStart(ctx sessionctx.Context) (uint64, error) {
|
|
if e.start.UnBounded {
|
|
return 0, nil
|
|
}
|
|
if e.isRangeFrame {
|
|
var start uint64
|
|
for start = max(e.lastStartRow, e.stagedStartRow); start < e.rowCnt; start++ {
|
|
var res int64
|
|
var err error
|
|
for i := range e.orderByCols {
|
|
res, _, err = e.start.CmpFuncs[i](ctx.GetExprCtx().GetEvalCtx(), e.start.CompareCols[i], e.start.CalcFuncs[i], e.getRow(start), e.getRow(e.curRowIdx))
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if res != 0 {
|
|
break
|
|
}
|
|
}
|
|
// For asc, break when the calculated result is greater than the current value.
|
|
// For desc, break when the calculated result is less than the current value.
|
|
if res != e.expectedCmpResult {
|
|
break
|
|
}
|
|
}
|
|
e.stagedStartRow = start
|
|
return start, nil
|
|
}
|
|
switch e.start.Type {
|
|
case ast.Preceding:
|
|
if e.curRowIdx > e.start.Num {
|
|
return e.curRowIdx - e.start.Num, nil
|
|
}
|
|
return 0, nil
|
|
case ast.Following:
|
|
return e.curRowIdx + e.start.Num, nil
|
|
default: // ast.CurrentRow
|
|
return e.curRowIdx, nil
|
|
}
|
|
}
|
|
|
|
func (e *PipelinedWindowExec) getEnd(ctx sessionctx.Context) (uint64, error) {
|
|
if e.end.UnBounded {
|
|
return e.rowCnt, nil
|
|
}
|
|
if e.isRangeFrame {
|
|
var end uint64
|
|
for end = max(e.lastEndRow, e.stagedEndRow); end < e.rowCnt; end++ {
|
|
var res int64
|
|
var err error
|
|
for i := range e.orderByCols {
|
|
res, _, err = e.end.CmpFuncs[i](ctx.GetExprCtx().GetEvalCtx(), e.end.CalcFuncs[i], e.end.CompareCols[i], e.getRow(e.curRowIdx), e.getRow(end))
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if res != 0 {
|
|
break
|
|
}
|
|
}
|
|
// For asc, break when the calculated result is greater than the current value.
|
|
// For desc, break when the calculated result is less than the current value.
|
|
if res == e.expectedCmpResult {
|
|
break
|
|
}
|
|
}
|
|
e.stagedEndRow = end
|
|
return end, nil
|
|
}
|
|
switch e.end.Type {
|
|
case ast.Preceding:
|
|
if e.curRowIdx >= e.end.Num {
|
|
return e.curRowIdx - e.end.Num + 1, nil
|
|
}
|
|
return 0, nil
|
|
case ast.Following:
|
|
return e.curRowIdx + e.end.Num + 1, nil
|
|
default: // ast.CurrentRow:
|
|
return e.curRowIdx + 1, nil
|
|
}
|
|
}
|
|
|
|
// produce produces rows and append it to chk, return produced means number of rows appended into chunk, available means
|
|
// number of rows processed but not fetched
|
|
func (e *PipelinedWindowExec) produce(ctx sessionctx.Context, chk *chunk.Chunk, remained uint64) (produced uint64, err error) {
|
|
var (
|
|
start uint64
|
|
end uint64
|
|
enough bool
|
|
)
|
|
for remained > 0 {
|
|
enough, err = e.enoughToProduce(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
if !enough {
|
|
break
|
|
}
|
|
start, err = e.getStart(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
end, err = e.getEnd(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
if end > e.rowCnt {
|
|
end = e.rowCnt
|
|
}
|
|
if start >= e.rowCnt {
|
|
start = e.rowCnt
|
|
}
|
|
// if start >= end, we should return a default value, and we reset the frame to empty.
|
|
if start >= end {
|
|
for i, wf := range e.windowFuncs {
|
|
if !e.emptyFrame {
|
|
wf.ResetPartialResult(e.partialResults[i])
|
|
}
|
|
err = wf.AppendFinalResult2Chunk(ctx.GetExprCtx().GetEvalCtx(), e.partialResults[i], chk)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
if !e.emptyFrame {
|
|
e.emptyFrame = true
|
|
e.initializedSlidingWindow = false
|
|
}
|
|
} else {
|
|
e.emptyFrame = false
|
|
for i, wf := range e.windowFuncs {
|
|
slidingWindowAggFunc := e.slidingWindowFuncs[i]
|
|
if e.lastStartRow != start || e.lastEndRow != end {
|
|
if slidingWindowAggFunc != nil && e.initializedSlidingWindow {
|
|
err = slidingWindowAggFunc.Slide(ctx.GetExprCtx().GetEvalCtx(), e.getRow, e.lastStartRow, e.lastEndRow, start-e.lastStartRow, end-e.lastEndRow, e.partialResults[i])
|
|
} else {
|
|
// For MinMaxSlidingWindowAggFuncs, it needs the absolute value of each start of window, to compare
|
|
// whether elements inside deque are out of current window.
|
|
if minMaxSlidingWindowAggFunc, ok := wf.(aggfuncs.MaxMinSlidingWindowAggFunc); ok {
|
|
// Store start inside MaxMinSlidingWindowAggFunc.windowInfo
|
|
minMaxSlidingWindowAggFunc.SetWindowStart(start)
|
|
}
|
|
// TODO(zhifeng): track memory usage here
|
|
wf.ResetPartialResult(e.partialResults[i])
|
|
_, err = wf.UpdatePartialResult(ctx.GetExprCtx().GetEvalCtx(), e.getRows(start, end), e.partialResults[i])
|
|
}
|
|
}
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = wf.AppendFinalResult2Chunk(ctx.GetExprCtx().GetEvalCtx(), e.partialResults[i], chk)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
e.initializedSlidingWindow = true
|
|
}
|
|
e.curRowIdx++
|
|
e.lastStartRow, e.lastEndRow = start, end
|
|
|
|
produced++
|
|
remained--
|
|
}
|
|
extend := min(e.curRowIdx, e.lastEndRow, e.lastStartRow)
|
|
if extend > e.rowStart {
|
|
numDrop := extend - e.rowStart
|
|
e.dropped += numDrop
|
|
e.rows = e.rows[numDrop:]
|
|
e.rowStart = extend
|
|
}
|
|
return
|
|
}
|
|
|
|
func (e *PipelinedWindowExec) enoughToProduce(ctx sessionctx.Context) (enough bool, err error) {
|
|
if e.curRowIdx >= e.rowCnt {
|
|
return false, nil
|
|
}
|
|
if e.whole {
|
|
return true, nil
|
|
}
|
|
start, err := e.getStart(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
end, err := e.getEnd(ctx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
return end < e.rowCnt && start < e.rowCnt, nil
|
|
}
|
|
|
|
// reset resets the processor
|
|
func (e *PipelinedWindowExec) reset() {
|
|
e.lastStartRow = 0
|
|
e.lastEndRow = 0
|
|
e.stagedStartRow = 0
|
|
e.stagedEndRow = 0
|
|
e.emptyFrame = false
|
|
e.curRowIdx = 0
|
|
e.whole = false
|
|
numDrop := e.rowCnt - e.rowStart
|
|
e.dropped += numDrop
|
|
e.rows = e.rows[numDrop:]
|
|
e.rowStart = 0
|
|
e.rowCnt = 0
|
|
e.initializedSlidingWindow = false
|
|
for i, windowFunc := range e.windowFuncs {
|
|
windowFunc.ResetPartialResult(e.partialResults[i])
|
|
}
|
|
}
|