Files
tidb/pkg/planner/core/logical_window.go

468 lines
16 KiB
Go

// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package core
import (
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/expression/aggregation"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/planner/core/operator/logicalop"
"github.com/pingcap/tidb/pkg/planner/property"
"github.com/pingcap/tidb/pkg/planner/util/optimizetrace"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/plancodec"
"github.com/pingcap/tipb/go-tipb"
)
// LogicalWindow represents a logical window function plan.
type LogicalWindow struct {
logicalop.LogicalSchemaProducer
WindowFuncDescs []*aggregation.WindowFuncDesc
PartitionBy []property.SortItem
OrderBy []property.SortItem
Frame *WindowFrame
}
// WindowFrame represents a window function frame.
type WindowFrame struct {
Type ast.FrameType
Start *FrameBound
End *FrameBound
}
// Clone copies a window frame totally.
func (wf *WindowFrame) Clone() *WindowFrame {
cloned := new(WindowFrame)
*cloned = *wf
cloned.Start = wf.Start.Clone()
cloned.End = wf.End.Clone()
return cloned
}
// FrameBound is the boundary of a frame.
type FrameBound struct {
Type ast.BoundType
UnBounded bool
Num uint64
// CalcFuncs is used for range framed windows.
// We will build the date_add or date_sub functions for frames like `INTERVAL '2:30' MINUTE_SECOND FOLLOWING`,
// and plus or minus for frames like `1 preceding`.
CalcFuncs []expression.Expression
// Sometimes we need to cast order by column to a specific type when frame type is range
CompareCols []expression.Expression
// CmpFuncs is used to decide whether one row is included in the current frame.
CmpFuncs []expression.CompareFunc
// This field is used for passing information to tiflash
CmpDataType tipb.RangeCmpDataType
// IsExplicitRange marks if this range explicitly appears in the sql
IsExplicitRange bool
}
// Clone copies a frame bound totally.
func (fb *FrameBound) Clone() *FrameBound {
cloned := new(FrameBound)
*cloned = *fb
cloned.CalcFuncs = make([]expression.Expression, 0, len(fb.CalcFuncs))
for _, it := range fb.CalcFuncs {
cloned.CalcFuncs = append(cloned.CalcFuncs, it.Clone())
}
cloned.CmpFuncs = fb.CmpFuncs
return cloned
}
func (fb *FrameBound) updateCmpFuncsAndCmpDataType(cmpDataType types.EvalType) {
// When cmpDataType can't match to any condition, we can ignore it.
//
// For example:
// `create table test.range_test(p int not null,o text not null,v int not null);`
// `select *, first_value(v) over (partition by p order by o) as a from range_test;`
// The sql's frame type is range, but the cmpDataType is ETString and when the user explicitly use range frame
// the sql will raise error before generating logical plan, so it's ok to ignore it.
switch cmpDataType {
case types.ETInt:
fb.CmpFuncs[0] = expression.CompareInt
fb.CmpDataType = tipb.RangeCmpDataType_Int
case types.ETDatetime, types.ETTimestamp:
fb.CmpFuncs[0] = expression.CompareTime
fb.CmpDataType = tipb.RangeCmpDataType_DateTime
case types.ETDuration:
fb.CmpFuncs[0] = expression.CompareDuration
fb.CmpDataType = tipb.RangeCmpDataType_Duration
case types.ETReal:
fb.CmpFuncs[0] = expression.CompareReal
fb.CmpDataType = tipb.RangeCmpDataType_Float
case types.ETDecimal:
fb.CmpFuncs[0] = expression.CompareDecimal
fb.CmpDataType = tipb.RangeCmpDataType_Decimal
}
}
// UpdateCompareCols will update CompareCols.
func (fb *FrameBound) UpdateCompareCols(ctx sessionctx.Context, orderByCols []*expression.Column) error {
ectx := ctx.GetExprCtx().GetEvalCtx()
if len(fb.CalcFuncs) > 0 {
fb.CompareCols = make([]expression.Expression, len(orderByCols))
if fb.CalcFuncs[0].GetType(ectx).EvalType() != orderByCols[0].GetType(ectx).EvalType() {
var err error
fb.CompareCols[0], err = expression.NewFunctionBase(ctx.GetExprCtx(), ast.Cast, fb.CalcFuncs[0].GetType(ectx), orderByCols[0])
if err != nil {
return err
}
} else {
for i, col := range orderByCols {
fb.CompareCols[i] = col
}
}
cmpDataType := expression.GetAccurateCmpType(ctx.GetExprCtx().GetEvalCtx(), fb.CompareCols[0], fb.CalcFuncs[0])
fb.updateCmpFuncsAndCmpDataType(cmpDataType)
}
return nil
}
// Init initializes LogicalWindow.
func (p LogicalWindow) Init(ctx base.PlanContext, offset int) *LogicalWindow {
p.BaseLogicalPlan = logicalop.NewBaseLogicalPlan(ctx, plancodec.TypeWindow, &p, offset)
return &p
}
// *************************** start implementation of Plan interface ***************************
// ReplaceExprColumns implements base.LogicalPlan interface.
func (p *LogicalWindow) ReplaceExprColumns(replace map[string]*expression.Column) {
for _, desc := range p.WindowFuncDescs {
for _, arg := range desc.Args {
ResolveExprAndReplace(arg, replace)
}
}
for _, item := range p.PartitionBy {
resolveColumnAndReplace(item.Col, replace)
}
for _, item := range p.OrderBy {
resolveColumnAndReplace(item.Col, replace)
}
}
// *************************** end implementation of Plan interface ***************************
// *************************** start implementation of logicalPlan interface ***************************
// HashCode inherits BaseLogicalPlan.LogicalPlan.<0th> implementation.
// PredicatePushDown implements base.LogicalPlan.<1st> interface.
func (p *LogicalWindow) PredicatePushDown(predicates []expression.Expression, opt *optimizetrace.LogicalOptimizeOp) ([]expression.Expression, base.LogicalPlan) {
canBePushed := make([]expression.Expression, 0, len(predicates))
canNotBePushed := make([]expression.Expression, 0, len(predicates))
partitionCols := expression.NewSchema(p.GetPartitionByCols()...)
for _, cond := range predicates {
// We can push predicate beneath Window, only if all of the
// extractedCols are part of partitionBy columns.
if expression.ExprFromSchema(cond, partitionCols) {
canBePushed = append(canBePushed, cond)
} else {
canNotBePushed = append(canNotBePushed, cond)
}
}
p.BaseLogicalPlan.PredicatePushDown(canBePushed, opt)
return canNotBePushed, p
}
// PruneColumns implements base.LogicalPlan.<2nd> interface.
func (p *LogicalWindow) PruneColumns(parentUsedCols []*expression.Column, opt *optimizetrace.LogicalOptimizeOp) (base.LogicalPlan, error) {
windowColumns := p.GetWindowResultColumns()
cnt := 0
for _, col := range parentUsedCols {
used := false
for _, windowColumn := range windowColumns {
if windowColumn.EqualColumn(col) {
used = true
break
}
}
if !used {
parentUsedCols[cnt] = col
cnt++
}
}
parentUsedCols = parentUsedCols[:cnt]
parentUsedCols = p.extractUsedCols(parentUsedCols)
var err error
p.Children()[0], err = p.Children()[0].PruneColumns(parentUsedCols, opt)
if err != nil {
return nil, err
}
p.SetSchema(p.Children()[0].Schema().Clone())
p.Schema().Append(windowColumns...)
return p, nil
}
// FindBestTask inherits BaseLogicalPlan.LogicalPlan.<3rd> implementation.
// BuildKeyInfo inherits BaseLogicalPlan.LogicalPlan.<4th> implementation.
// PushDownTopN inherits BaseLogicalPlan.LogicalPlan.<5th> implementation.
// DeriveTopN inherits BaseLogicalPlan.LogicalPlan.<6th> implementation.
// PredicateSimplification inherits BaseLogicalPlan.LogicalPlan.<7th> implementation.
// ConstantPropagation inherits BaseLogicalPlan.LogicalPlan.<8th> implementation.
// PullUpConstantPredicates inherits BaseLogicalPlan.LogicalPlan.<9th> implementation.
// RecursiveDeriveStats inherits BaseLogicalPlan.LogicalPlan.<10th> implementation.
// DeriveStats implements base.LogicalPlan.<11th> interface.
func (p *LogicalWindow) DeriveStats(childStats []*property.StatsInfo, selfSchema *expression.Schema, _ []*expression.Schema, colGroups [][]*expression.Column) (*property.StatsInfo, error) {
if p.StatsInfo() != nil {
// Reload GroupNDVs since colGroups may have changed.
p.StatsInfo().GroupNDVs = p.GetGroupNDVs(colGroups, childStats)
return p.StatsInfo(), nil
}
childProfile := childStats[0]
p.SetStats(&property.StatsInfo{
RowCount: childProfile.RowCount,
ColNDVs: make(map[int64]float64, selfSchema.Len()),
})
childLen := selfSchema.Len() - len(p.WindowFuncDescs)
for i := 0; i < childLen; i++ {
id := selfSchema.Columns[i].UniqueID
p.StatsInfo().ColNDVs[id] = childProfile.ColNDVs[id]
}
for i := childLen; i < selfSchema.Len(); i++ {
p.StatsInfo().ColNDVs[selfSchema.Columns[i].UniqueID] = childProfile.RowCount
}
p.StatsInfo().GroupNDVs = p.GetGroupNDVs(colGroups, childStats)
return p.StatsInfo(), nil
}
// ExtractColGroups implements base.LogicalPlan.<12th> interface.
func (p *LogicalWindow) ExtractColGroups(colGroups [][]*expression.Column) [][]*expression.Column {
if len(colGroups) == 0 {
return nil
}
childSchema := p.Children()[0].Schema()
_, offsets := childSchema.ExtractColGroups(colGroups)
if len(offsets) == 0 {
return nil
}
extracted := make([][]*expression.Column, len(offsets))
for i, offset := range offsets {
extracted[i] = colGroups[offset]
}
return extracted
}
// PreparePossibleProperties implements base.LogicalPlan.<13th> interface.
func (p *LogicalWindow) PreparePossibleProperties(_ *expression.Schema, _ ...[][]*expression.Column) [][]*expression.Column {
result := make([]*expression.Column, 0, len(p.PartitionBy)+len(p.OrderBy))
for i := range p.PartitionBy {
result = append(result, p.PartitionBy[i].Col)
}
for i := range p.OrderBy {
result = append(result, p.OrderBy[i].Col)
}
return [][]*expression.Column{result}
}
// ExhaustPhysicalPlans implements base.LogicalPlan.<14th> interface.
func (p *LogicalWindow) ExhaustPhysicalPlans(prop *property.PhysicalProperty) ([]base.PhysicalPlan, bool, error) {
return exhaustLogicalWindowPhysicalPlans(p, prop)
}
// ExtractCorrelatedCols implements base.LogicalPlan.<15th> interface.
func (p *LogicalWindow) ExtractCorrelatedCols() []*expression.CorrelatedColumn {
corCols := make([]*expression.CorrelatedColumn, 0, len(p.WindowFuncDescs))
for _, windowFunc := range p.WindowFuncDescs {
for _, arg := range windowFunc.Args {
corCols = append(corCols, expression.ExtractCorColumns(arg)...)
}
}
if p.Frame != nil {
if p.Frame.Start != nil {
for _, expr := range p.Frame.Start.CalcFuncs {
corCols = append(corCols, expression.ExtractCorColumns(expr)...)
}
}
if p.Frame.End != nil {
for _, expr := range p.Frame.End.CalcFuncs {
corCols = append(corCols, expression.ExtractCorColumns(expr)...)
}
}
}
return corCols
}
// MaxOneRow inherits BaseLogicalPlan.LogicalPlan.<16th> implementation.
// Children inherits BaseLogicalPlan.LogicalPlan.<17th> implementation.
// SetChildren inherits BaseLogicalPlan.LogicalPlan.<18th> implementation.
// SetChild inherits BaseLogicalPlan.LogicalPlan.<19th> implementation.
// RollBackTaskMap inherits BaseLogicalPlan.LogicalPlan.<20th> implementation.
// CanPushToCop inherits BaseLogicalPlan.LogicalPlan.<21st> implementation.
// ExtractFD inherits BaseLogicalPlan.LogicalPlan.<22nd> implementation.
// GetBaseLogicalPlan inherits BaseLogicalPlan.LogicalPlan.<23rd> implementation.
// ConvertOuterToInnerJoin inherits BaseLogicalPlan.LogicalPlan.<24th> implementation.
// *************************** end implementation of logicalPlan interface ***************************
// GetPartitionBy returns partition by fields.
func (p *LogicalWindow) GetPartitionBy() []property.SortItem {
return p.PartitionBy
}
// EqualPartitionBy checks whether two LogicalWindow.Partitions are equal.
func (p *LogicalWindow) EqualPartitionBy(newWindow *LogicalWindow) bool {
if len(p.PartitionBy) != len(newWindow.PartitionBy) {
return false
}
partitionByColsMap := make(map[int64]struct{})
for _, item := range p.PartitionBy {
partitionByColsMap[item.Col.UniqueID] = struct{}{}
}
for _, item := range newWindow.PartitionBy {
if _, ok := partitionByColsMap[item.Col.UniqueID]; !ok {
return false
}
}
return true
}
// EqualOrderBy checks whether two LogicalWindow.OrderBys are equal.
func (p *LogicalWindow) EqualOrderBy(ctx expression.EvalContext, newWindow *LogicalWindow) bool {
if len(p.OrderBy) != len(newWindow.OrderBy) {
return false
}
for i, item := range p.OrderBy {
if !item.Col.Equal(ctx, newWindow.OrderBy[i].Col) ||
item.Desc != newWindow.OrderBy[i].Desc {
return false
}
}
return true
}
// EqualFrame checks whether two LogicalWindow.Frames are equal.
func (p *LogicalWindow) EqualFrame(ctx expression.EvalContext, newWindow *LogicalWindow) bool {
if (p.Frame == nil && newWindow.Frame != nil) ||
(p.Frame != nil && newWindow.Frame == nil) {
return false
}
if p.Frame == nil && newWindow.Frame == nil {
return true
}
if p.Frame.Type != newWindow.Frame.Type ||
p.Frame.Start.Type != newWindow.Frame.Start.Type ||
p.Frame.Start.UnBounded != newWindow.Frame.Start.UnBounded ||
p.Frame.Start.Num != newWindow.Frame.Start.Num ||
p.Frame.End.Type != newWindow.Frame.End.Type ||
p.Frame.End.UnBounded != newWindow.Frame.End.UnBounded ||
p.Frame.End.Num != newWindow.Frame.End.Num {
return false
}
for i, expr := range p.Frame.Start.CalcFuncs {
if !expr.Equal(ctx, newWindow.Frame.Start.CalcFuncs[i]) {
return false
}
}
for i, expr := range p.Frame.End.CalcFuncs {
if !expr.Equal(ctx, newWindow.Frame.End.CalcFuncs[i]) {
return false
}
}
return true
}
// GetWindowResultColumns returns the columns storing the result of the window function.
func (p *LogicalWindow) GetWindowResultColumns() []*expression.Column {
return p.Schema().Columns[p.Schema().Len()-len(p.WindowFuncDescs):]
}
// GetPartitionKeys gets partition keys for a logical window, it will assign column id for expressions.
func (p *LogicalWindow) GetPartitionKeys() []*property.MPPPartitionColumn {
partitionByCols := make([]*property.MPPPartitionColumn, 0, len(p.GetPartitionByCols()))
for _, item := range p.PartitionBy {
partitionByCols = append(partitionByCols, &property.MPPPartitionColumn{
Col: item.Col,
CollateID: property.GetCollateIDByNameForPartition(item.Col.GetStaticType().GetCollate()),
})
}
return partitionByCols
}
// CheckComparisonForTiFlash check Duration vs Datetime is invalid comparison as TiFlash can't handle it so far.
func (p *LogicalWindow) CheckComparisonForTiFlash(frameBound *FrameBound) bool {
if len(frameBound.CompareCols) > 0 {
orderByEvalType := p.OrderBy[0].Col.GetStaticType().EvalType()
calFuncEvalType := frameBound.CalcFuncs[0].GetType(p.SCtx().GetExprCtx().GetEvalCtx()).EvalType()
if orderByEvalType == types.ETDuration && (calFuncEvalType == types.ETDatetime || calFuncEvalType == types.ETTimestamp) {
return false
} else if calFuncEvalType == types.ETDuration && (orderByEvalType == types.ETDatetime || orderByEvalType == types.ETTimestamp) {
return false
}
}
return true
}
func (p *LogicalWindow) extractUsedCols(parentUsedCols []*expression.Column) []*expression.Column {
for _, desc := range p.WindowFuncDescs {
for _, arg := range desc.Args {
parentUsedCols = append(parentUsedCols, expression.ExtractColumns(arg)...)
}
}
for _, by := range p.PartitionBy {
parentUsedCols = append(parentUsedCols, by.Col)
}
for _, by := range p.OrderBy {
parentUsedCols = append(parentUsedCols, by.Col)
}
return parentUsedCols
}
// GetPartitionByCols extracts 'partition by' columns from the Window.
func (p *LogicalWindow) GetPartitionByCols() []*expression.Column {
partitionCols := make([]*expression.Column, 0, len(p.PartitionBy))
for _, partitionItem := range p.PartitionBy {
partitionCols = append(partitionCols, partitionItem.Col)
}
return partitionCols
}
// GetGroupNDVs gets the GroupNDVs of the LogicalWindow.
func (*LogicalWindow) GetGroupNDVs(colGroups [][]*expression.Column, childStats []*property.StatsInfo) []property.GroupNDV {
if len(colGroups) > 0 {
return childStats[0].GroupNDVs
}
return nil
}