Files
tidb/pkg/executor/join/hash_join_base.go

374 lines
11 KiB
Go

// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package join
import (
"context"
"sync"
"sync/atomic"
"time"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/executor/internal/exec"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/util"
"github.com/pingcap/tidb/pkg/util/chunk"
"github.com/pingcap/tidb/pkg/util/disk"
"github.com/pingcap/tidb/pkg/util/memory"
)
// hashjoinWorkerResult stores the result of join workers,
// `src` is for Chunk reuse: the main goroutine will get the join result chunk `chk`,
// and push `chk` into `src` after processing, join worker goroutines get the empty chunk from `src`
// and push new data into this chunk.
type hashjoinWorkerResult struct {
chk *chunk.Chunk
err error
src chan<- *chunk.Chunk
}
type hashJoinCtxBase struct {
SessCtx sessionctx.Context
ChunkAllocPool chunk.Allocator
// Concurrency is the number of partition, build and join workers.
Concurrency uint
joinResultCh chan *hashjoinWorkerResult
// closeCh add a lock for closing executor.
closeCh chan struct{}
finished atomic.Bool
IsNullEQ []bool
buildFinished chan error
JoinType base.JoinType
IsNullAware bool
memTracker *memory.Tracker // track memory usage.
diskTracker *disk.Tracker // track disk usage.
}
type probeSideTupleFetcherBase struct {
ProbeSideExec exec.Executor
probeChkResourceCh chan *probeChkResource
probeResultChs []chan *chunk.Chunk
requiredRows int64
joinResultChannel chan *hashjoinWorkerResult
buildSuccess bool
}
func (fetcher *probeSideTupleFetcherBase) initializeForProbeBase(concurrency uint, joinResultChannel chan *hashjoinWorkerResult) {
// fetcher.probeResultChs is for transmitting the chunks which store the data of
// ProbeSideExec, it'll be written by probe side worker goroutine, and read by join
// workers.
fetcher.probeResultChs = make([]chan *chunk.Chunk, concurrency)
for i := range concurrency {
fetcher.probeResultChs[i] = make(chan *chunk.Chunk, 1)
}
// fetcher.probeChkResourceCh is for transmitting the used ProbeSideExec chunks from
// join workers to ProbeSideExec worker.
fetcher.probeChkResourceCh = make(chan *probeChkResource, concurrency)
for i := range concurrency {
fetcher.probeChkResourceCh <- &probeChkResource{
chk: exec.NewFirstChunk(fetcher.ProbeSideExec),
dest: fetcher.probeResultChs[i],
}
}
fetcher.joinResultChannel = joinResultChannel
}
func (fetcher *probeSideTupleFetcherBase) handleProbeSideFetcherPanic(r any) {
for i := range fetcher.probeResultChs {
close(fetcher.probeResultChs[i])
}
if r != nil {
fetcher.joinResultChannel <- &hashjoinWorkerResult{err: util.GetRecoverError(r)}
}
}
type isBuildSideEmpty func() bool
type isSpillTriggered func() bool
func wait4BuildSide(isBuildEmpty isBuildSideEmpty, checkSpill isSpillTriggered, canSkipIfBuildEmpty, needScanAfterProbeDone bool, hashJoinCtx *hashJoinCtxBase) (skipProbe bool, buildSuccess bool) {
var err error
skipProbe = false
buildSuccess = false
select {
case <-hashJoinCtx.closeCh:
// current executor is closed, no need to probe
skipProbe = true
case err = <-hashJoinCtx.buildFinished:
if err != nil {
// build meet error, no need to probe
skipProbe = true
} else {
buildSuccess = true
}
}
// only check build empty if build success
if buildSuccess && isBuildEmpty() && !checkSpill() && canSkipIfBuildEmpty {
// if build side is empty, can skip probe if canSkipIfBuildEmpty is true(e.g. inner join)
skipProbe = true
}
if err != nil {
// if err is not nil, send out the error
hashJoinCtx.joinResultCh <- &hashjoinWorkerResult{
err: err,
}
} else if skipProbe {
// if skipProbe is true and there is no need to scan hash table after probe, just the whole hash join is finished
if !needScanAfterProbeDone {
hashJoinCtx.finished.Store(true)
}
}
return skipProbe, buildSuccess
}
func (fetcher *probeSideTupleFetcherBase) getProbeSideResource(shouldLimitProbeFetchSize bool, maxChunkSize int, hashJoinCtx *hashJoinCtxBase) *probeChkResource {
if hashJoinCtx.finished.Load() {
return nil
}
var probeSideResource *probeChkResource
var ok bool
select {
case <-hashJoinCtx.closeCh:
return nil
case probeSideResource, ok = <-fetcher.probeChkResourceCh:
if !ok {
return nil
}
}
if shouldLimitProbeFetchSize {
required := int(atomic.LoadInt64(&fetcher.requiredRows))
probeSideResource.chk.SetRequiredRows(required, maxChunkSize)
}
return probeSideResource
}
// fetchProbeSideChunks get chunks from fetches chunks from the big table in a background goroutine
// and sends the chunks to multiple channels which will be read by multiple join workers.
func (fetcher *probeSideTupleFetcherBase) fetchProbeSideChunks(ctx context.Context, maxChunkSize int, isBuildEmpty isBuildSideEmpty, checkSpill isSpillTriggered, canSkipIfBuildEmpty, needScanAfterProbeDone, shouldLimitProbeFetchSize bool, hashJoinCtx *hashJoinCtxBase) {
hasWaitedForBuild := false
for {
probeSideResource := fetcher.getProbeSideResource(shouldLimitProbeFetchSize, maxChunkSize, hashJoinCtx)
if probeSideResource == nil {
return
}
probeSideResult := probeSideResource.chk
err := exec.Next(ctx, fetcher.ProbeSideExec, probeSideResult)
failpoint.Inject("ConsumeRandomPanic", nil)
if err != nil {
hashJoinCtx.joinResultCh <- &hashjoinWorkerResult{
err: err,
}
return
}
err = triggerIntest(2)
if err != nil {
hashJoinCtx.joinResultCh <- &hashjoinWorkerResult{
err: err,
}
return
}
if !hasWaitedForBuild {
failpoint.Inject("issue30289", func(val failpoint.Value) {
if val.(bool) {
probeSideResult.Reset()
}
})
skipProbe, buildSuccess := wait4BuildSide(isBuildEmpty, checkSpill, canSkipIfBuildEmpty, needScanAfterProbeDone, hashJoinCtx)
fetcher.buildSuccess = buildSuccess
if skipProbe {
// there is no need to probe, so just return
return
}
hasWaitedForBuild = true
}
if probeSideResult.NumRows() == 0 {
return
}
probeSideResource.dest <- probeSideResult
}
}
type probeWorkerBase struct {
WorkerID uint
probeChkResourceCh chan *probeChkResource
joinChkResourceCh chan *chunk.Chunk
probeResultCh chan *chunk.Chunk
}
func (worker *probeWorkerBase) initializeForProbe(probeChkResourceCh chan *probeChkResource, probeResultCh chan *chunk.Chunk, joinExec exec.Executor) {
// worker.joinChkResourceCh is for transmitting the reused join result chunks
// from the main thread to probe worker goroutines.
worker.joinChkResourceCh = make(chan *chunk.Chunk, 1)
worker.joinChkResourceCh <- exec.NewFirstChunk(joinExec)
worker.probeChkResourceCh = probeChkResourceCh
worker.probeResultCh = probeResultCh
}
type buildWorkerBase struct {
BuildSideExec exec.Executor
BuildKeyColIdx []int
}
func syncerAdd(syncer *sync.WaitGroup) {
if syncer != nil {
syncer.Add(1)
}
}
func syncerDone(syncer *sync.WaitGroup) {
if syncer != nil {
syncer.Done()
}
}
func checkAndSpillRowTableIfNeeded(fetcherAndWorkerSyncer *sync.WaitGroup, spillHelper *hashJoinSpillHelper) error {
if fetcherAndWorkerSyncer == nil {
return nil
}
if spillHelper.isSpillNeeded() {
// Wait for the stop of all workers
fetcherAndWorkerSyncer.Wait()
return spillHelper.spillRowTable(nil)
}
return nil
}
// fetchBuildSideRows fetches all rows from build side executor, and append them
// to e.buildSideResult.
func (w *buildWorkerBase) fetchBuildSideRows(ctx context.Context, hashJoinCtx *hashJoinCtxBase, fetcherAndWorkerSyncer *sync.WaitGroup, spillHelper *hashJoinSpillHelper, chkCh chan<- *chunk.Chunk, errCh chan<- error, doneCh <-chan struct{}) {
hasError := false
// We must put the close of chkCh after the place of spilling remaining rows or there will be data race
defer close(chkCh)
defer func() {
if r := recover(); r != nil {
errCh <- util.GetRecoverError(r)
return
}
if hasError {
return
}
if fetcherAndWorkerSyncer != nil {
if spillHelper.isSpillTriggered() {
// Spill remaining rows
fetcherAndWorkerSyncer.Wait()
err := spillHelper.spillRemainingRows()
if err != nil {
errCh <- errors.Trace(err)
}
}
}
}()
var err error
failpoint.Inject("issue30289", func(val failpoint.Value) {
if val.(bool) {
err = errors.Errorf("issue30289 build return error")
errCh <- errors.Trace(err)
return
}
})
failpoint.Inject("issue42662_1", func(val failpoint.Value) {
if val.(bool) {
if hashJoinCtx.SessCtx.GetSessionVars().ConnectionID != 0 {
// consume 170MB memory, this sql should be tracked into MemoryTop1Tracker
hashJoinCtx.memTracker.Consume(170 * 1024 * 1024)
}
return
}
})
sessVars := hashJoinCtx.SessCtx.GetSessionVars()
failpoint.Inject("issue51998", func(val failpoint.Value) {
if val.(bool) {
time.Sleep(2 * time.Second)
}
})
for {
err := checkAndSpillRowTableIfNeeded(fetcherAndWorkerSyncer, spillHelper)
issue59377Intest(&err)
if err != nil {
hasError = true
errCh <- errors.Trace(err)
return
}
err = triggerIntest(2)
if err != nil {
hasError = true
errCh <- errors.Trace(err)
return
}
if hashJoinCtx.finished.Load() {
return
}
chk := hashJoinCtx.ChunkAllocPool.Alloc(w.BuildSideExec.RetFieldTypes(), sessVars.MaxChunkSize, sessVars.MaxChunkSize)
err = exec.Next(ctx, w.BuildSideExec, chk)
failpoint.Inject("issue51998", func(val failpoint.Value) {
if val.(bool) {
hasError = true
err = errors.Errorf("issue51998 build return error")
}
})
if err != nil {
hasError = true
errCh <- errors.Trace(err)
return
}
failpoint.Inject("errorFetchBuildSideRowsMockOOMPanic", nil)
failpoint.Inject("ConsumeRandomPanic", nil)
if chk.NumRows() == 0 {
return
}
syncerAdd(fetcherAndWorkerSyncer)
select {
case <-doneCh:
syncerDone(fetcherAndWorkerSyncer)
return
case <-hashJoinCtx.closeCh:
syncerDone(fetcherAndWorkerSyncer)
return
case chkCh <- chk:
}
}
}
// probeChkResource stores the result of the join probe side fetch worker,
// `dest` is for Chunk reuse: after join workers process the probe side chunk which is read from `dest`,
// they'll store the used chunk as `chk`, and then the probe side fetch worker will put new data into `chk` and write `chk` into dest.
type probeChkResource struct {
chk *chunk.Chunk
dest chan<- *chunk.Chunk
}