Files
tidb/pkg/expression/constant_propagation.go
2025-12-22 10:08:23 +00:00

1039 lines
36 KiB
Go

// Copyright 2016 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package expression
import (
"slices"
"sync"
"github.com/pingcap/tidb/pkg/expression/exprctx"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/parser/terror"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/chunk"
"github.com/pingcap/tidb/pkg/util/collate"
"github.com/pingcap/tidb/pkg/util/disjointset"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/logutil"
"go.uber.org/zap"
)
// MaxPropagateColsCnt means the max number of columns that can participate propagation.
var MaxPropagateColsCnt = 100
// VaildConstantPropagationExpressionFuncType is to filter the unsuitable expression when to propagate the constant.
// Currently, only the `LogicalJoin.isVaildConstantPropagationExpression` has implemented this capability.
// For more information, you can refer to the comments on this.
type VaildConstantPropagationExpressionFuncType func(Expression) bool
type basePropConstSolver struct {
colMapper map[int64]int // colMapper maps column to its index
eqMapper map[int]*Constant // if eqMapper[i] != nil, it means col_i = eqMapper[i]
unionSet *disjointset.SimpleIntSet // unionSet stores the relations like col_i = col_j
columns []*Column // columns stores all columns appearing in the conditions
ctx exprctx.ExprContext
}
func newBasePropConstSolver() basePropConstSolver {
return basePropConstSolver{
colMapper: make(map[int64]int, 4),
eqMapper: make(map[int]*Constant, 4),
columns: make([]*Column, 0, 4),
unionSet: disjointset.NewIntSet(4),
}
}
func (s *basePropConstSolver) Clear() {
clear(s.colMapper)
clear(s.eqMapper)
s.columns = s.columns[:0]
s.unionSet.Clear()
s.ctx = nil
}
func (s *basePropConstSolver) getColID(col *Column) int {
return s.colMapper[col.UniqueID]
}
func (s *basePropConstSolver) insertCols(cols map[int64]*Column) {
for uniqueID, col := range cols {
_, ok := s.colMapper[uniqueID]
if !ok {
s.colMapper[uniqueID] = len(s.colMapper)
s.columns = append(s.columns, col)
}
}
}
// tryToUpdateEQList tries to update the eqMapper. When the eqMapper has store this column with a different constant, like
// a = 1 and a = 2, we set the second return value to false.
func (s *basePropConstSolver) tryToUpdateEQList(col *Column, con *Constant) (bool, bool) {
if con.Value.IsNull() && ConstExprConsiderPlanCache(con, s.ctx.IsUseCache()) {
return false, true
}
id := s.getColID(col)
oldCon, ok := s.eqMapper[id]
if ok {
evalCtx := s.ctx.GetEvalCtx()
res, err := oldCon.Value.Compare(evalCtx.TypeCtx(), &con.Value, collate.GetCollator(col.GetType(s.ctx.GetEvalCtx()).GetCollate()))
return false, res != 0 || err != nil
}
s.eqMapper[id] = con
return true, false
}
// ValidCompareConstantPredicate checks if the predicate is an expression like [column '>'|'>='|'<'|'<='|'=' constant].
// return param1: return true, if the predicate is a compare constant predicate.
// return param2: return the column side of predicate.
func ValidCompareConstantPredicate(ctx EvalContext, candidatePredicate Expression) bool {
scalarFunction, ok := candidatePredicate.(*ScalarFunction)
if !ok {
return false
}
if scalarFunction.FuncName.L != ast.GT && scalarFunction.FuncName.L != ast.GE &&
scalarFunction.FuncName.L != ast.LT && scalarFunction.FuncName.L != ast.LE &&
scalarFunction.FuncName.L != ast.EQ {
return false
}
column, _ := ValidCompareConstantPredicateHelper(ctx, scalarFunction, true)
if column == nil {
column, _ = ValidCompareConstantPredicateHelper(ctx, scalarFunction, false)
}
if column == nil {
return false
}
return true
}
// ValidCompareConstantPredicateHelper checks if the predicate is a compare constant predicate, like "Column xxx Constant"
func ValidCompareConstantPredicateHelper(ctx EvalContext, eq *ScalarFunction, colIsLeft bool) (*Column, *Constant) {
var col *Column
var con *Constant
colOk := false
conOk := false
if colIsLeft {
col, colOk = eq.GetArgs()[0].(*Column)
} else {
col, colOk = eq.GetArgs()[1].(*Column)
}
if !colOk {
return nil, nil
}
if colIsLeft {
con, conOk = eq.GetArgs()[1].(*Constant)
} else {
con, conOk = eq.GetArgs()[0].(*Constant)
}
if !conOk {
return nil, nil
}
if col.GetStaticType().GetCollate() != con.GetType(ctx).GetCollate() {
return nil, nil
}
return col, con
}
// validEqualCond checks if the cond is an expression like [column eq constant].
func validEqualCond(ctx EvalContext, cond Expression) (*Column, *Constant) {
if eq, ok := cond.(*ScalarFunction); ok {
if eq.FuncName.L != ast.EQ {
return nil, nil
}
col, con := ValidCompareConstantPredicateHelper(ctx, eq, true)
if col == nil {
return ValidCompareConstantPredicateHelper(ctx, eq, false)
}
return col, con
}
return nil, nil
}
// replaceEqCondtionWithTrue replaces eq condition in 'cond' by true if both 'src' and 'tgt' appear in 'eq cond'.
func replaceEqCondtionWithTrue(ctx BuildContext, src *Column, tgt *Column, cond Expression) (Expression, bool) {
if src.RetType.GetType() != tgt.RetType.GetType() {
return cond, false
}
sf, ok := cond.(*ScalarFunction)
if !ok {
return cond, false
}
replaced := false
args := sf.GetArgs()
evalCtx := ctx.GetEvalCtx()
switch sf.FuncName.L {
case ast.In:
if src.GetType(ctx.GetEvalCtx()).EvalType() == types.ETString || tgt.GetType(ctx.GetEvalCtx()).EvalType() == types.ETString {
// It is duo to ```CheckAndDeriveCollationFromExprs``` in the ```deriveCollation```.
// If we have an expression a in (b,c,d) with each column which has difference collation, the expression's
// return type is decided by ```CheckAndDeriveCollationFromExprs```. it will get diffence return type.
// So when encountering a string type, we can just return it directly.
return cond, false
}
// for 'a in (b, c, d)', if a = b or a = c or a = d, we can replace it with true
constTrue := false
switch {
case args[0].Equal(ctx.GetEvalCtx(), src):
constTrue = slices.ContainsFunc(args[1:], func(arg Expression) bool {
return arg.Equal(ctx.GetEvalCtx(), tgt)
})
case args[0].Equal(ctx.GetEvalCtx(), tgt):
constTrue = slices.ContainsFunc(args[1:], func(arg Expression) bool {
return arg.Equal(ctx.GetEvalCtx(), src)
})
}
if constTrue {
return &Constant{
Value: types.NewDatum(true),
RetType: types.NewFieldType(mysql.TypeTiny),
}, true
}
case ast.EQ:
// If it has equal condition `a=b`, we meet it again and we can replace it with true
if (args[0].Equal(evalCtx, src) && args[1].Equal(evalCtx, tgt)) || (args[1].Equal(evalCtx, src) && args[0].Equal(evalCtx, tgt)) {
return &Constant{
Value: types.NewDatum(true),
RetType: types.NewFieldType(mysql.TypeTiny),
}, true
}
case ast.LogicOr, ast.LogicAnd:
for idx, expr := range args {
if sf, ok := expr.(*ScalarFunction); ok {
newExpr, subReplaced := replaceEqCondtionWithTrue(ctx, src, tgt, sf)
if subReplaced {
replaced = true
args[idx] = newExpr
}
}
}
if replaced {
return NewFunctionInternal(ctx, sf.FuncName.L, sf.GetType(ctx.GetEvalCtx()), args...), true
}
}
return cond, false
}
// tryToReplaceCond aims to replace all occurrences of column 'src' and try to replace it with 'tgt' in 'cond'
// It returns
//
// bool: if a replacement happened
// bool: if 'cond' contains non-deterministic expression
// Expression: the replaced expression, or original 'cond' if the replacement didn't happen
//
// For example:
//
// for 'a, b, a < 3', it returns 'true, false, b < 3'
// for 'a, b, sin(a) + cos(a) = 5', it returns 'true, false, returns sin(b) + cos(b) = 5'
// for 'a, b, cast(a) < rand()', it returns 'false, true, cast(a) < rand()'
func tryToReplaceCond(ctx BuildContext, src *Column, tgt *Column, cond Expression, nullAware bool) (bool, bool, Expression) {
if src.RetType.GetType() != tgt.RetType.GetType() {
return false, false, cond
}
sf, ok := cond.(*ScalarFunction)
if !ok {
return false, false, cond
}
replaced := false
var args []Expression
if _, ok := unFoldableFunctions[sf.FuncName.L]; ok {
return false, true, cond
}
if _, ok := inequalFunctions[sf.FuncName.L]; ok {
return false, true, cond
}
// See
// https://github.com/pingcap/tidb/issues/15782
// https://github.com/pingcap/tidb/issues/17817
// The null sensitive function's result may rely on the original nullable information of the outer side column.
// Its args cannot be replaced easily.
// A more strict check is that after we replace the arg. We check the nullability of the new expression.
// But we haven't maintained it yet, so don't replace the arg of the control function currently.
if nullAware &&
(sf.FuncName.L == ast.Ifnull ||
sf.FuncName.L == ast.If ||
sf.FuncName.L == ast.Case ||
sf.FuncName.L == ast.NullEQ) {
return false, true, cond
}
evalCtx := ctx.GetEvalCtx()
for idx, expr := range sf.GetArgs() {
if src.EqualColumn(expr) {
_, coll := cond.CharsetAndCollation()
if tgt.GetType(evalCtx).GetCollate() != coll {
continue
}
replaced = true
if args == nil {
args = make([]Expression, len(sf.GetArgs()))
copy(args, sf.GetArgs())
}
args[idx] = tgt
} else {
subReplaced, isNonDeterministic, subExpr := tryToReplaceCond(ctx, src, tgt, expr, nullAware)
if isNonDeterministic {
return false, true, cond
} else if subReplaced {
replaced = true
if args == nil {
args = make([]Expression, len(sf.GetArgs()))
copy(args, sf.GetArgs())
}
args[idx] = subExpr
}
}
}
if replaced {
return true, false, NewFunctionInternal(ctx, sf.FuncName.L, sf.GetType(ctx.GetEvalCtx()), args...)
}
return false, false, cond
}
var propConstSolverPool = sync.Pool{
New: func() any {
solver := &propConstSolver{
basePropConstSolver: newBasePropConstSolver(),
conditions: make([]Expression, 0, 4),
}
return solver
},
}
type propConstSolver struct {
basePropConstSolver
conditions []Expression
// TODO: remove this func pointer for performance
vaildExprFunc VaildConstantPropagationExpressionFuncType
// if schema1 and schema2 are not nil, we're propagating constants for inner joins.
// for outer joins, use propOuterJoinConstSolver instead.
schema1 *Schema
schema2 *Schema
}
// newPropConstSolver returns a PropagateConstantSolver.
func newPropConstSolver() PropagateConstantSolver {
solver := propConstSolverPool.Get().(*propConstSolver)
return solver
}
// PropagateConstant propagate constant values of deterministic predicates in a condition.
func (s *propConstSolver) PropagateConstant(ctx exprctx.ExprContext,
keepJoinKey bool, schema1, schema2 *Schema,
vaildExprFunc VaildConstantPropagationExpressionFuncType, conditions []Expression) []Expression {
s.ctx = ctx
s.vaildExprFunc = vaildExprFunc
s.schema1 = schema1
s.schema2 = schema2
return s.solve(keepJoinKey, conditions)
}
// Clear clears the solver and returns it to the pool.
func (s *propConstSolver) Clear() {
s.basePropConstSolver.Clear()
s.conditions = s.conditions[:0]
s.vaildExprFunc = nil
s.schema1 = nil
s.schema2 = nil
propConstSolverPool.Put(s)
}
// propagateConstantEQ propagates expressions like 'column = constant' by substituting the constant for column, the
// procedure repeats multiple times. An example runs as following:
// a = d & b * 2 = c & c = d + 2 & b = 1 & a = 4, we pick eq cond b = 1 and a = 4
// d = 4 & 2 = c & c = d + 2 & b = 1 & a = 4, we propagate b = 1 and a = 4 and pick eq cond c = 2 and d = 4
// d = 4 & 2 = c & false & b = 1 & a = 4, we propagate c = 2 and d = 4, and do constant folding: c = d + 2 will be folded as false.
func (s *propConstSolver) propagateConstantEQ() {
intest.Assert(len(s.eqMapper) == 0 && s.eqMapper != nil)
visited := make([]bool, len(s.conditions))
cols := make([]*Column, 0, 4)
cons := make([]Expression, 0, 4)
for range MaxPropagateColsCnt {
mapper := s.pickNewEQConds(visited)
if len(mapper) == 0 {
return
}
cols = slices.Grow(cols, len(mapper))
cons = slices.Grow(cons, len(mapper))
for id, con := range mapper {
cols = append(cols, s.columns[id])
cons = append(cons, con)
}
for i, cond := range s.conditions {
if !visited[i] {
s.conditions[i] = ColumnSubstitute(s.ctx, cond, NewSchema(cols...), cons)
}
}
cols = cols[:0]
cons = cons[:0]
}
}
// propagateColumnEQ propagates expressions like 'column A = column B' by adding extra filters
// 'expression(..., column B, ...)' propagated from 'expression(..., column A, ...)' as long as:
//
// 1. The expression is deterministic
// 2. The expression doesn't have any side effect
//
// e.g. For expression a = b and b = c and c = d and c < 1 , we can get extra a < 1 and b < 1 and d < 1.
// However, for a = b and a < rand(), we cannot propagate a < rand() to b < rand() because rand() is non-deterministic
//
// This propagation may bring redundancies that we need to resolve later, for example:
// for a = b and a < 3 and b < 3, we get new a < 3 and b < 3, which are redundant
// for a = b and a < 3 and 3 > b, we get new b < 3 and 3 > a, which are redundant
// for a = b and a < 3 and b < 4, we get new a < 4 and b < 3 but should expect a < 3 and b < 3
// for a = b and a in (3) and b in (4), we get b in (3) and a in (4) but should expect 'false'
//
// TODO: remove redundancies later
//
// We maintain a unionSet representing the equivalent for every two columns.
func (s *propConstSolver) propagateColumnEQ() {
visited := make([]bool, len(s.conditions))
if s.unionSet == nil {
s.unionSet = disjointset.NewIntSet(len(s.columns))
} else {
s.unionSet.GrowNewIntSet(len(s.columns))
}
allVisited := true
for i := range s.conditions {
if fun, ok := s.conditions[i].(*ScalarFunction); ok && fun.FuncName.L == ast.EQ {
lCol, rCol, ok := IsColOpCol(fun)
// TODO: Enable hybrid types in ConstantPropagate.
if ok && lCol.GetType(s.ctx.GetEvalCtx()).GetCollate() == rCol.GetType(s.ctx.GetEvalCtx()).GetCollate() && !lCol.GetType(s.ctx.GetEvalCtx()).Hybrid() && !rCol.GetType(s.ctx.GetEvalCtx()).Hybrid() {
lID := s.getColID(lCol)
rID := s.getColID(rCol)
visited[i] = true
if s.unionSet.FindRoot(lID) != s.unionSet.FindRoot(rID) {
// Add the equality relation to unionSet
// if it has been added, we don't need to process it again.
// It will be deleted in the replaceEqCondtionWithTrueitionsWithConstants
s.unionSet.Union(lID, rID)
} else if lID != rID {
s.conditions[i] = &Constant{
Value: types.NewDatum(true),
RetType: types.NewFieldType(mysql.TypeTiny),
}
}
continue
}
}
allVisited = false
}
if allVisited {
return
}
condsLen := len(s.conditions)
for i, coli := range s.columns {
for j := i + 1; j < len(s.columns); j++ {
// unionSet doesn't have iterate(), we use a two layer loop to iterate col_i = col_j relation
if s.unionSet.FindRoot(i) != s.unionSet.FindRoot(j) {
continue
}
colj := s.columns[j]
for k := range condsLen {
if visited[k] {
// cond_k has been used to retrieve equality relation
continue
}
s.conditions[k], _ = replaceEqCondtionWithTrue(s.ctx, coli, colj, s.conditions[k])
cond := s.conditions[k]
replaced, _, newExpr := tryToReplaceCond(s.ctx, coli, colj, cond, false)
if replaced {
// TODO(hawkingrei): if it is the true expression, we can remvoe it.
if isConstant(newExpr) || s.vaildExprFunc == nil || s.vaildExprFunc(newExpr) {
s.conditions = append(s.conditions, newExpr)
}
}
// Why do we need to replace colj with coli again?
// Consider the case: col1 = col3
// Join Schema: left schema: {col1,col2}, right schema: {col3,col4}
// Conditions: col1 in (col3, col4)
//
// because the order of columns in equaility relation is not guaranteed,
// We may replace col3 with col1 first, it cannot push down the condition to the child.
// because two columns are from different side.
// But if we replace col1 with col3, it can be pushed down.
// So we need to try both directions.
replaced, _, newExpr = tryToReplaceCond(s.ctx, colj, coli, cond, false)
if replaced {
// TODO(hawkingrei): if it is the true expression, we can remvoe it.
if isConstant(newExpr) || s.vaildExprFunc == nil || s.vaildExprFunc(newExpr) {
s.conditions = append(s.conditions, newExpr)
}
}
}
}
}
}
// isConstant is to determine whether the expression is a constant.
func isConstant(cond Expression) bool {
_, ok := cond.(*Constant)
return ok
}
func (s *propConstSolver) setConds2ConstFalse() {
if MaybeOverOptimized4PlanCache(s.ctx, s.conditions...) {
s.ctx.SetSkipPlanCache("some parameters may be overwritten when constant propagation")
}
s.conditions = s.conditions[:0]
s.conditions = append(s.conditions, &Constant{
Value: types.NewDatum(false),
RetType: types.NewFieldType(mysql.TypeTiny),
})
}
// pickNewEQConds tries to pick new equal conds and puts them to retMapper.
func (s *propConstSolver) pickNewEQConds(visited []bool) (retMapper map[int]*Constant) {
retMapper = make(map[int]*Constant)
for i, cond := range s.conditions {
if visited[i] {
continue
}
col, con := validEqualCond(s.ctx.GetEvalCtx(), cond)
// Then we check if this CNF item is a false constant. If so, we will set the whole condition to false.
var ok bool
if col == nil {
con, ok = cond.(*Constant)
if !ok {
continue
}
visited[i] = true
value, _, err := EvalBool(s.ctx.GetEvalCtx(), []Expression{con}, chunk.Row{})
if err != nil {
terror.Log(err)
return nil
}
if !value {
s.setConds2ConstFalse()
return nil
}
continue
}
// TODO: Enable hybrid types in ConstantPropagate.
if col.GetType(s.ctx.GetEvalCtx()).Hybrid() {
continue
}
visited[i] = true
updated, foreverFalse := s.tryToUpdateEQList(col, con)
if foreverFalse {
s.setConds2ConstFalse()
return nil
}
if updated {
colType := col.GetType(s.ctx.GetEvalCtx())
conType := con.GetType(s.ctx.GetEvalCtx())
castedCon := con
if !colType.Equal(conType) {
oriWarningCnt := s.ctx.GetEvalCtx().WarningCount()
newExpr := BuildCastFunction(s.ctx, con, colType.DeepCopy())
s.ctx.GetEvalCtx().TruncateWarnings(oriWarningCnt)
if newCon, ok := newExpr.(*Constant); ok {
castedCon = newCon
}
}
retMapper[s.getColID(col)] = castedCon
}
}
return
}
func (s *propConstSolver) solve(keepJoinKey bool, conditions []Expression) []Expression {
var joinKeys []Expression
if keepJoinKey {
// keep join keys in the results since they are crucial for join optimization like join reorder
// and index join selection. (#63314, #60076)
joinKeys = cloneJoinKeys(conditions, s.schema1, s.schema2)
}
s.conditions = slices.Grow(s.conditions, len(conditions))
s.extractColumns(conditions)
if len(s.columns) > MaxPropagateColsCnt {
logutil.BgLogger().Warn("too many columns in a single CNF",
zap.Int("numCols", len(s.columns)),
zap.Int("maxNumCols", MaxPropagateColsCnt),
)
return conditions
}
s.propagateConstantEQ()
s.propagateColumnEQ()
s.conditions = propagateConstantDNF(s.ctx, s.vaildExprFunc, s.conditions...)
s.conditions = append(s.conditions, joinKeys...)
s.conditions = RemoveDupExprs(s.conditions)
return slices.Clone(s.conditions)
}
func (s *propConstSolver) extractColumns(conditions []Expression) {
mp := GetUniqueIDToColumnMap()
defer PutUniqueIDToColumnMap(mp)
s.conditions = s.extractColumnsInternal(mp, s.conditions, conditions)
}
// PropagateConstantForJoin propagate constants for inner joins.
func PropagateConstantForJoin(ctx exprctx.ExprContext, keepJoinKey bool, schema1, schema2 *Schema,
filter VaildConstantPropagationExpressionFuncType, conditions ...Expression) []Expression {
if len(conditions) == 0 {
return conditions
}
solver := newPropConstSolver()
defer func() {
solver.Clear()
}()
return solver.PropagateConstant(exprctx.WithConstantPropagateCheck(ctx), keepJoinKey, schema1, schema2, filter, conditions)
}
// PropagateConstant propagate constant values of deterministic predicates in a condition.
// This is a constant propagation logic for expression list such as ['a=1', 'a=b']
func PropagateConstant(ctx exprctx.ExprContext, filter VaildConstantPropagationExpressionFuncType, conditions ...Expression) []Expression {
if len(conditions) == 0 {
return conditions
}
solver := newPropConstSolver()
defer func() {
solver.Clear()
}()
return solver.PropagateConstant(exprctx.WithConstantPropagateCheck(ctx), false, nil, nil, filter, conditions)
}
var propOuterJoinConstSolverPool = sync.Pool{
New: func() any {
solver := &propOuterJoinConstSolver{
basePropConstSolver: newBasePropConstSolver(),
joinConds: make([]Expression, 0, 4),
filterConds: make([]Expression, 0, 4),
}
return solver
},
}
// propOuterJoinConstSolver is used to propagate constant values over outer join.
// Outer join predicates need special care since we can only propagate constants from the inner side to
// the outer side, otherwise nullability of the join result could be incorrectly affected.
// For example: `select * from t1 left join t2 on t1.a=t2.a where t1.a=1`, we can't propagate t1.a=1 to t2.a=1,
// since after this propagation, t2.a will never be null, which is incorrect for this outer join query.
type propOuterJoinConstSolver struct {
basePropConstSolver
joinConds []Expression
filterConds []Expression
outerSchema *Schema
innerSchema *Schema
// TODO: remove this func pointer for performance
vaildExprFunc VaildConstantPropagationExpressionFuncType
// nullSensitive indicates if this outer join is null sensitive, if true, we cannot generate
// additional `col is not null` condition from column equal conditions. Specifically, this value
// is true for LeftOuterSemiJoin, AntiLeftOuterSemiJoin and AntiSemiJoin.
nullSensitive bool
}
func newPropOuterJoinConstSolver() *propOuterJoinConstSolver {
solver := propOuterJoinConstSolverPool.Get().(*propOuterJoinConstSolver)
return solver
}
// clear resets the solver.
func (s *propOuterJoinConstSolver) Clear() {
s.basePropConstSolver.Clear()
s.joinConds = s.joinConds[:0]
s.filterConds = s.filterConds[:0]
s.outerSchema = nil
s.innerSchema = nil
s.nullSensitive = false
s.vaildExprFunc = nil
propOuterJoinConstSolverPool.Put(s)
}
func (s *propOuterJoinConstSolver) setConds2ConstFalse(filterConds bool) {
s.joinConds = s.joinConds[:0]
s.joinConds = append(s.joinConds, &Constant{
Value: types.NewDatum(false),
RetType: types.NewFieldType(mysql.TypeTiny),
})
if filterConds {
s.filterConds = s.filterConds[:0]
s.filterConds = append(s.filterConds, &Constant{
Value: types.NewDatum(false),
RetType: types.NewFieldType(mysql.TypeTiny),
})
}
}
func (s *basePropConstSolver) dealWithPossibleHybridType(col *Column, con *Constant) (*Constant, bool) {
if !col.GetType(s.ctx.GetEvalCtx()).Hybrid() {
return con, true
}
if col.GetType(s.ctx.GetEvalCtx()).GetType() == mysql.TypeEnum {
d, err := con.Eval(s.ctx.GetEvalCtx(), chunk.Row{})
if err != nil {
return nil, false
}
if MaybeOverOptimized4PlanCache(s.ctx, con) {
s.ctx.SetSkipPlanCache("Skip plan cache since mutable constant is restored and propagated")
}
switch d.Kind() {
case types.KindInt64:
enum, err := types.ParseEnumValue(col.GetType(s.ctx.GetEvalCtx()).GetElems(), uint64(d.GetInt64()))
if err != nil {
logutil.BgLogger().Debug("Invalid Enum parsed during constant propagation")
return nil, false
}
con = &Constant{
Value: types.NewMysqlEnumDatum(enum),
RetType: col.RetType.Clone(),
collationInfo: col.collationInfo,
}
case types.KindString:
enum, err := types.ParseEnumName(col.GetType(s.ctx.GetEvalCtx()).GetElems(), d.GetString(), d.Collation())
if err != nil {
logutil.BgLogger().Debug("Invalid Enum parsed during constant propagation")
return nil, false
}
con = &Constant{
Value: types.NewMysqlEnumDatum(enum),
RetType: col.RetType.Clone(),
collationInfo: col.collationInfo,
}
case types.KindMysqlEnum, types.KindMysqlSet:
// It's already a hybrid type. Just use it.
default:
// We skip other cases first.
return nil, false
}
return con, true
}
return nil, false
}
func (s *basePropConstSolver) extractColumnsInternal(mp map[int64]*Column, splitConds []Expression, conds []Expression) []Expression {
for _, cond := range conds {
splitConds = append(splitConds, SplitCNFItems(cond)...)
ExtractColumnsMapFromExpressionsWithReusedMap(mp, nil, cond)
s.insertCols(mp)
clear(mp)
}
return splitConds
}
// pickEQCondsOnOuterCol picks constant equal expression from specified conditions.
func (s *propOuterJoinConstSolver) pickEQCondsOnOuterCol(retMapper map[int]*Constant, visited []bool, filterConds bool) map[int]*Constant {
var conds []Expression
var condsOffset int
if filterConds {
conds = s.filterConds
} else {
conds = s.joinConds
condsOffset = len(s.filterConds)
}
for i, cond := range conds {
if visited[i+condsOffset] {
continue
}
col, con := validEqualCond(s.ctx.GetEvalCtx(), cond)
// Then we check if this CNF item is a false constant. If so, we will set the whole condition to false.
var ok bool
if col == nil {
con, ok = cond.(*Constant)
if !ok {
continue
}
visited[i+condsOffset] = true
value, _, err := EvalBool(s.ctx.GetEvalCtx(), []Expression{con}, chunk.Row{})
if err != nil {
terror.Log(err)
return nil
}
if !value {
s.setConds2ConstFalse(filterConds)
return nil
}
continue
}
var valid bool
con, valid = s.dealWithPossibleHybridType(col, con)
if !valid {
continue
}
// Only extract `outerCol = const` expressions.
if !s.outerSchema.Contains(col) {
continue
}
visited[i+condsOffset] = true
updated, foreverFalse := s.tryToUpdateEQList(col, con)
if foreverFalse {
s.setConds2ConstFalse(filterConds)
return nil
}
if updated {
retMapper[s.getColID(col)] = con
}
}
return retMapper
}
// pickNewEQConds picks constant equal expressions from join and filter conditions.
func (s *propOuterJoinConstSolver) pickNewEQConds(visited []bool) map[int]*Constant {
retMapper := make(map[int]*Constant)
retMapper = s.pickEQCondsOnOuterCol(retMapper, visited, true)
if retMapper == nil {
// Filter is constant false or error occurred, enforce early termination.
return nil
}
retMapper = s.pickEQCondsOnOuterCol(retMapper, visited, false)
return retMapper
}
// propagateConstantEQ propagates expressions like `outerCol = const` by substituting `outerCol` in *JOIN* condition
// with `const`, the procedure repeats multiple times.
func (s *propOuterJoinConstSolver) propagateConstantEQ() {
clear(s.eqMapper)
lenFilters := len(s.filterConds)
visited := make([]bool, lenFilters+len(s.joinConds))
for range MaxPropagateColsCnt {
mapper := s.pickNewEQConds(visited)
if len(mapper) == 0 {
return
}
cols := make([]*Column, 0, len(mapper))
cons := make([]Expression, 0, len(mapper))
for id, con := range mapper {
cols = append(cols, s.columns[id])
cons = append(cons, con)
}
for i, cond := range s.joinConds {
if !visited[i+lenFilters] {
s.joinConds[i] = ColumnSubstitute(s.ctx, cond, NewSchema(cols...), cons)
}
}
}
}
func (s *propOuterJoinConstSolver) colsFromOuterAndInner(col1, col2 *Column) (*Column, *Column) {
if s.outerSchema.Contains(col1) && s.innerSchema.Contains(col2) {
return col1, col2
}
if s.outerSchema.Contains(col2) && s.innerSchema.Contains(col1) {
return col2, col1
}
return nil, nil
}
// validColEqualCond checks if expression is column equal condition that we can use for constant
// propagation over outer join. We only use expression like `outerCol = innerCol`, for expressions like
// `outerCol1 = outerCol2` or `innerCol1 = innerCol2`, they do not help deriving new inner table conditions
// which can be pushed down to children plan nodes, so we do not pick them.
func (s *propOuterJoinConstSolver) validColEqualCond(cond Expression) (*Column, *Column) {
if fun, ok := cond.(*ScalarFunction); ok && fun.FuncName.L == ast.EQ {
lCol, lOk := fun.GetArgs()[0].(*Column)
rCol, rOk := fun.GetArgs()[1].(*Column)
if lOk && rOk && lCol.GetType(s.ctx.GetEvalCtx()).GetCollate() == rCol.GetType(s.ctx.GetEvalCtx()).GetCollate() {
return s.colsFromOuterAndInner(lCol, rCol)
}
}
return nil, nil
}
// deriveConds given `outerCol = innerCol`, derive new expression for specified conditions.
func (s *propOuterJoinConstSolver) deriveConds(outerCol, innerCol *Column, schema *Schema, fCondsOffset int, visited []bool, filterConds bool) []bool {
var offset, condsLen int
var conds []Expression
if filterConds {
conds = s.filterConds
offset = fCondsOffset
condsLen = len(s.filterConds)
} else {
conds = s.joinConds
condsLen = fCondsOffset
}
for k := range condsLen {
if visited[k+offset] {
// condition has been used to retrieve equality relation or contains column beyond children schema.
continue
}
cond := conds[k]
if !ExprFromSchema(cond, schema) {
visited[k+offset] = true
continue
}
replaced, _, newExpr := tryToReplaceCond(s.ctx, outerCol, innerCol, cond, true)
if replaced {
// TODO(hawkingrei): if it is the true expression, we can remvoe it.
if !isConstant(newExpr) && s.vaildExprFunc != nil && !s.vaildExprFunc(newExpr) {
continue
}
s.joinConds = append(s.joinConds, newExpr)
}
}
return visited
}
// propagateColumnEQ propagates expressions like 'outerCol = innerCol' by adding extra filters
// 'expression(..., innerCol, ...)' derived from 'expression(..., outerCol, ...)' as long as
// 'expression(..., outerCol, ...)' does not reference columns outside children schemas of join node.
// Derived new expressions must be appended into join condition, not filter condition.
func (s *propOuterJoinConstSolver) propagateColumnEQ() {
if s.nullSensitive {
return
}
visited := make([]bool, 2*len(s.joinConds)+len(s.filterConds))
if s.unionSet == nil {
s.unionSet = disjointset.NewIntSet(len(s.columns))
} else {
s.unionSet.GrowNewIntSet(len(s.columns))
}
var outerCol, innerCol *Column
// Only consider column equal condition in joinConds.
// If we have column equal in filter condition, the outer join should have been simplified already.
for i := range s.joinConds {
outerCol, innerCol = s.validColEqualCond(s.joinConds[i])
if outerCol != nil {
outerID := s.getColID(outerCol)
innerID := s.getColID(innerCol)
s.unionSet.Union(outerID, innerID)
visited[i] = true
// Generate `innerCol is not null` from `outerCol = innerCol`. Note that `outerCol is not null`
// does not hold since we are in outer join.
// For AntiLeftOuterSemiJoin, this does not work, for example:
// `select *, t1.a not in (select t2.b from t t2) from t t1` does not imply `t2.b is not null`.
// For LeftOuterSemiJoin, this does not work either, for example:
// `select *, t1.a in (select t2.b from t t2) from t t1`
// rows with t2.b is null would impact whether LeftOuterSemiJoin should output 0 or null if there
// is no row satisfying t2.b = t1.a
childCol := s.innerSchema.RetrieveColumn(innerCol)
if !mysql.HasNotNullFlag(childCol.RetType.GetFlag()) {
notNullExpr := BuildNotNullExpr(s.ctx, childCol)
s.joinConds = append(s.joinConds, notNullExpr)
}
}
}
lenJoinConds := len(s.joinConds)
mergedSchema := MergeSchema(s.outerSchema, s.innerSchema)
for i, coli := range s.columns {
for j := i + 1; j < len(s.columns); j++ {
// unionSet doesn't have iterate(), we use a two layer loop to iterate col_i = col_j relation.
if s.unionSet.FindRoot(i) != s.unionSet.FindRoot(j) {
continue
}
colj := s.columns[j]
outerCol, innerCol = s.colsFromOuterAndInner(coli, colj)
if outerCol == nil {
continue
}
visited = s.deriveConds(outerCol, innerCol, mergedSchema, lenJoinConds, visited, false)
visited = s.deriveConds(outerCol, innerCol, mergedSchema, lenJoinConds, visited, true)
}
}
}
func (s *propOuterJoinConstSolver) solve(keepJoinKey bool, joinConds, filterConds []Expression) ([]Expression, []Expression) {
var joinKeys []Expression
if keepJoinKey {
// keep join keys in the results since they are crucial for join optimization like join reorder
// and index join selection. (#63314, #60076)
joinKeys = cloneJoinKeys(joinConds, s.outerSchema, s.innerSchema)
}
s.extractColumns(joinConds, filterConds)
if len(s.columns) > MaxPropagateColsCnt {
logutil.BgLogger().Warn("too many columns",
zap.Int("numCols", len(s.columns)),
zap.Int("maxNumCols", MaxPropagateColsCnt),
)
return joinConds, filterConds
}
s.propagateConstantEQ()
s.propagateColumnEQ()
s.joinConds = propagateConstantDNF(s.ctx, s.vaildExprFunc, s.joinConds...)
s.joinConds = RemoveDupExprs(append(s.joinConds, joinKeys...))
s.filterConds = propagateConstantDNF(s.ctx, s.vaildExprFunc, s.filterConds...)
return slices.Clone(s.joinConds), slices.Clone(s.filterConds)
}
func (s *propOuterJoinConstSolver) extractColumns(joinConds, filterConds []Expression) {
mp := GetUniqueIDToColumnMap()
defer PutUniqueIDToColumnMap(mp)
s.joinConds = s.extractColumnsInternal(mp, s.joinConds, joinConds)
s.filterConds = s.extractColumnsInternal(mp, s.filterConds, filterConds)
}
// propagateConstantDNF find DNF item from CNF, and propagate constant inside DNF.
func propagateConstantDNF(ctx exprctx.ExprContext, filter VaildConstantPropagationExpressionFuncType, conds ...Expression) []Expression {
for i, cond := range conds {
if dnf, ok := cond.(*ScalarFunction); ok && dnf.FuncName.L == ast.LogicOr {
dnfItems := SplitDNFItems(cond)
for j, item := range dnfItems {
dnfItems[j] = ComposeCNFCondition(ctx, PropagateConstant(ctx, filter, item)...)
}
conds[i] = ComposeDNFCondition(ctx, dnfItems...)
}
}
return conds
}
// PropConstForOuterJoin propagate constant equal and column equal conditions over outer join or anti semi join.
// First step is to extract `outerCol = const` from join conditions and filter conditions,
// and substitute `outerCol` in join conditions with `const`;
// Second step is to extract `outerCol = innerCol` from join conditions, and derive new join
// conditions based on this column equal condition and `outerCol` related
// expressions in join conditions and filter conditions;
func PropConstForOuterJoin(ctx exprctx.ExprContext, joinConds, filterConds []Expression,
outerSchema, innerSchema *Schema, keepJoinKey, nullSensitive bool,
vaildExprFunc VaildConstantPropagationExpressionFuncType) ([]Expression, []Expression) {
solver := newPropOuterJoinConstSolver()
defer func() {
solver.Clear()
}()
solver.outerSchema = outerSchema
solver.innerSchema = innerSchema
solver.nullSensitive = nullSensitive
solver.ctx = ctx
solver.vaildExprFunc = vaildExprFunc
return solver.solve(keepJoinKey, joinConds, filterConds)
}
// PropagateConstantSolver is a constant propagate solver.
type PropagateConstantSolver interface {
PropagateConstant(ctx exprctx.ExprContext,
keepJoinKey bool, schema1, schema2 *Schema,
filter VaildConstantPropagationExpressionFuncType, conditions []Expression) []Expression
Clear()
}
// cloneJoinKeys clones all join keys like `t1.col = t2.col` in these expressions.
// schema1 and schema2 are used to identify join keys.
func cloneJoinKeys(exprs []Expression, schema1, schema2 *Schema) (joinKeys []Expression) {
if schema1 == nil || schema2 == nil {
return nil
}
for _, expr := range exprs {
if isJoinKey(expr, schema1, schema2) {
joinKeys = append(joinKeys, expr.Clone())
}
}
return
}
// isJoinKey returns true if this expression could be a join key like `t1.col = t2.col`.
func isJoinKey(expr Expression, schema1, schema2 *Schema) bool {
binop, ok := expr.(*ScalarFunction)
if !ok || binop.FuncName.L != ast.EQ {
return false
}
col1, lOK := binop.GetArgs()[0].(*Column)
col2, rOK := binop.GetArgs()[1].(*Column)
if !lOK || !rOK {
return false
}
// from different tables
return (schema1.Contains(col1) && schema2.Contains(col2)) ||
(schema1.Contains(col2) && schema2.Contains(col1))
}