// Copyright 2016 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // See the License for the specific language governing permissions and // limitations under the License. package executor import ( "sync" "sync/atomic" "github.com/cznic/mathutil" "github.com/juju/errors" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/plan" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/terror" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/mvmap" goctx "golang.org/x/net/context" ) var ( _ Executor = &HashJoinExec{} _ Executor = &NestedLoopApplyExec{} ) // HashJoinExec implements the hash join algorithm. type HashJoinExec struct { baseExecutor outerExec Executor innerExec Executor outerFilter expression.CNFExprs innerFilter expression.CNFExprs outerKeys []*expression.Column innerKeys []*expression.Column prepared bool concurrency int // concurrency is number of concurrent channels and join workers. hashTable *mvmap.MVMap hashJoinBuffers []*hashJoinBuffer outerBufferChs []chan *execResult workerWaitGroup sync.WaitGroup // workerWaitGroup is for sync multiple join workers. finished atomic.Value closeCh chan struct{} // closeCh add a lock for closing executor. joinType plan.JoinType resultGenerator joinResultGenerator resultBufferCh chan *execResult // Channels for output. resultBuffer []Row resultCursor int } type hashJoinBuffer struct { data []types.Datum bytes []byte } // Close implements the Executor Close interface. func (e *HashJoinExec) Close() error { e.finished.Store(true) if err := e.baseExecutor.Close(); err != nil { return errors.Trace(err) } if e.prepared { for range e.resultBufferCh { } <-e.closeCh } e.resultBuffer = nil return nil } // Open implements the Executor Open interface. func (e *HashJoinExec) Open(goCtx goctx.Context) error { if err := e.baseExecutor.Open(goCtx); err != nil { return errors.Trace(err) } e.prepared = false e.hashJoinBuffers = make([]*hashJoinBuffer, 0, e.concurrency) for i := 0; i < e.concurrency; i++ { buffer := &hashJoinBuffer{ data: make([]types.Datum, len(e.outerKeys)), bytes: make([]byte, 0, 10000), } e.hashJoinBuffers = append(e.hashJoinBuffers, buffer) } e.closeCh = make(chan struct{}) e.finished.Store(false) e.workerWaitGroup = sync.WaitGroup{} e.resultCursor = 0 return nil } // makeJoinRow simply creates a new row that appends row b to row a. func makeJoinRow(a Row, b Row) Row { ret := make([]types.Datum, 0, len(a)+len(b)) ret = append(ret, a...) ret = append(ret, b...) return ret } func (e *HashJoinExec) encodeRow(b []byte, row Row) ([]byte, error) { loc := e.ctx.GetSessionVars().GetTimeZone() for _, datum := range row { tmp, err := tablecodec.EncodeValue(datum, loc) if err != nil { return nil, errors.Trace(err) } b = append(b, tmp...) } return b, nil } func (e *HashJoinExec) decodeRow(data []byte) (Row, error) { values := make([]types.Datum, e.innerExec.Schema().Len()) err := codec.SetRawValues(data, values) if err != nil { return nil, errors.Trace(err) } err = decodeRawValues(values, e.innerExec.Schema(), e.ctx.GetSessionVars().GetTimeZone()) if err != nil { return nil, errors.Trace(err) } return values, nil } // getJoinKey gets the hash key when given a row and hash columns. // It will return a boolean value representing if the hash key has null, a byte slice representing the result hash code. func getJoinKey(cols []*expression.Column, row Row, vals []types.Datum, bytes []byte) (bool, []byte, error) { var err error for i, col := range cols { vals[i], err = col.Eval(row) if err != nil { return false, nil, errors.Trace(err) } if vals[i].IsNull() { return true, nil, nil } } if len(vals) == 0 { return false, nil, nil } bytes, err = codec.HashValues(bytes, vals...) return false, bytes, errors.Trace(err) } // fetchOuterRows fetches rows from the big table in a background goroutine // and sends the rows to multiple channels which will be read by multiple join workers. func (e *HashJoinExec) fetchOuterRows(goCtx goctx.Context) { defer func() { for _, outerBufferCh := range e.outerBufferChs { close(outerBufferCh) } e.workerWaitGroup.Done() }() bufferCapacity, maxBufferCapacity := 1, 128 for i, noMoreData := 0, false; !noMoreData; i = (i + 1) % e.concurrency { outerBuffer := &execResult{rows: make([]Row, 0, bufferCapacity)} for !noMoreData && len(outerBuffer.rows) < bufferCapacity { if e.finished.Load().(bool) { return } outerRow, err := e.outerExec.Next(goCtx) if err != nil || outerRow == nil { outerBuffer.err = errors.Trace(err) noMoreData = true break } outerBuffer.rows = append(outerBuffer.rows, outerRow) } if noMoreData && len(outerBuffer.rows) == 0 && outerBuffer.err == nil { break } select { // TODO: Recover the code. // case <-e.ctx.GoCtx().Done(): // return case e.outerBufferChs[i] <- outerBuffer: if !noMoreData && bufferCapacity < maxBufferCapacity { bufferCapacity <<= 1 } } } } // prepare runs the first time when 'Next' is called, it starts one worker goroutine to fetch rows from the big table, // and reads all data from the small table to build a hash table, then starts multiple join worker goroutines. func (e *HashJoinExec) prepare(goCtx goctx.Context) error { e.hashTable = mvmap.NewMVMap() var buffer []byte for { innerRow, err := e.innerExec.Next(goCtx) if err != nil { return errors.Trace(err) } if innerRow == nil { break } matched, err := expression.EvalBool(e.innerFilter, innerRow, e.ctx) if err != nil { return errors.Trace(err) } if !matched { continue } hasNull, joinKey, err := getJoinKey(e.innerKeys, innerRow, e.hashJoinBuffers[0].data, nil) if err != nil { return errors.Trace(err) } if hasNull { continue } buffer = buffer[:0] buffer, err = e.encodeRow(buffer, innerRow) if err != nil { return errors.Trace(err) } e.hashTable.Put(joinKey, buffer) } e.prepared = true e.resultBufferCh = make(chan *execResult, e.concurrency) // If it's inner join and the small table is filtered out, there is no need to fetch big table and // start join workers to do the join work. Otherwise, we start one goroutine to fetch outer rows // and e.concurrency goroutines to concatenate the matched inner and outer rows and filter the result. if !(e.hashTable.Len() == 0 && e.joinType == plan.InnerJoin) { e.outerBufferChs = make([]chan *execResult, e.concurrency) for i := 0; i < e.concurrency; i++ { e.outerBufferChs[i] = make(chan *execResult, e.concurrency) } // Start a worker to fetch outer rows and partition them to join workers. e.workerWaitGroup.Add(1) go e.fetchOuterRows(goCtx) // Start e.concurrency join workers to probe hash table and join inner and outer rows. for i := 0; i < e.concurrency; i++ { e.workerWaitGroup.Add(1) go e.runJoinWorker(i) } } // start a goroutine to wait join workers finish their job and close channels. go e.waitJoinWorkersAndCloseResultChan() return nil } func (e *HashJoinExec) waitJoinWorkersAndCloseResultChan() { e.workerWaitGroup.Wait() close(e.resultBufferCh) close(e.closeCh) } // filterOuters filters the outer rows stored in "outerBuffer" and move all the matched rows ahead of the unmatched rows. // The number of matched outer rows is returned as the first value. func (e *HashJoinExec) filterOuters(outerBuffer *execResult, outerFilterResult []bool) (int, error) { if e.outerFilter == nil { return len(outerBuffer.rows), nil } outerFilterResult = outerFilterResult[:0] for _, outerRow := range outerBuffer.rows { matched, err := expression.EvalBool(e.outerFilter, outerRow, e.ctx) if err != nil { return 0, errors.Trace(err) } outerFilterResult = append(outerFilterResult, matched) } i, j := 0, len(outerBuffer.rows)-1 for i <= j { for i <= j && outerFilterResult[i] { i++ } for i <= j && !outerFilterResult[j] { j-- } if i <= j { outerFilterResult[i], outerFilterResult[j] = outerFilterResult[j], outerFilterResult[i] outerBuffer.rows[i], outerBuffer.rows[j] = outerBuffer.rows[j], outerBuffer.rows[i] } } return i, nil } // runJoinWorker does join job in one goroutine. func (e *HashJoinExec) runJoinWorker(workerID int) { bufferCapacity := 1024 resultBuffer := &execResult{rows: make([]Row, 0, bufferCapacity)} outerFilterResult := make([]bool, 0, bufferCapacity) var outerBuffer *execResult for ok := true; ok; { select { // TODO: Recover the code. // case <-e.ctx.GoCtx().Done(): // ok = false case outerBuffer, ok = <-e.outerBufferChs[workerID]: } if !ok || e.finished.Load().(bool) { break } if outerBuffer.err != nil { resultBuffer.err = errors.Trace(outerBuffer.err) break } numMatchedOuters, err := e.filterOuters(outerBuffer, outerFilterResult) if err != nil { outerBuffer.err = errors.Trace(err) break } // process unmatched outer rows. for _, unMatchedOuter := range outerBuffer.rows[numMatchedOuters:] { resultBuffer.rows, resultBuffer.err = e.resultGenerator.emit(unMatchedOuter, nil, resultBuffer.rows) if resultBuffer.err != nil { resultBuffer.err = errors.Trace(resultBuffer.err) break } } if resultBuffer.err != nil { break } // process matched outer rows. for _, outerRow := range outerBuffer.rows[:numMatchedOuters] { if len(resultBuffer.rows) >= bufferCapacity { e.resultBufferCh <- resultBuffer resultBuffer = &execResult{rows: make([]Row, 0, bufferCapacity)} } ok = e.joinOuterRow(workerID, outerRow, resultBuffer) if !ok { break } } } if len(resultBuffer.rows) > 0 || resultBuffer.err != nil { e.resultBufferCh <- resultBuffer } e.workerWaitGroup.Done() } // joinOuterRow creates result rows from a row in a big table and sends them to resultRows channel. // Every matching row generates a result row. // If there are no matching rows and it is outer join, a null filled result row is created. func (e *HashJoinExec) joinOuterRow(workerID int, outerRow Row, resultBuffer *execResult) bool { buffer := e.hashJoinBuffers[workerID] hasNull, joinKey, err := getJoinKey(e.outerKeys, outerRow, buffer.data, buffer.bytes[:0:cap(buffer.bytes)]) if err != nil { resultBuffer.err = errors.Trace(err) return false } if hasNull { resultBuffer.rows, resultBuffer.err = e.resultGenerator.emit(outerRow, nil, resultBuffer.rows) resultBuffer.err = errors.Trace(resultBuffer.err) return true } values := e.hashTable.Get(joinKey) if len(values) == 0 { resultBuffer.rows, resultBuffer.err = e.resultGenerator.emit(outerRow, nil, resultBuffer.rows) resultBuffer.err = errors.Trace(resultBuffer.err) return true } innerRows := make([]Row, 0, len(values)) for _, value := range values { innerRow, err1 := e.decodeRow(value) if err1 != nil { resultBuffer.rows = nil resultBuffer.err = errors.Trace(err1) return false } innerRows = append(innerRows, innerRow) } resultBuffer.rows, resultBuffer.err = e.resultGenerator.emit(outerRow, innerRows, resultBuffer.rows) if resultBuffer.err != nil { resultBuffer.err = errors.Trace(resultBuffer.err) return false } return true } // Next implements the Executor Next interface. func (e *HashJoinExec) Next(goCtx goctx.Context) (Row, error) { if !e.prepared { if err := e.prepare(goCtx); err != nil { return nil, errors.Trace(err) } } if e.resultCursor >= len(e.resultBuffer) { e.resultCursor = 0 select { case resultBuffer, ok := <-e.resultBufferCh: if !ok { return nil, nil } if resultBuffer.err != nil { e.finished.Store(true) return nil, errors.Trace(resultBuffer.err) } e.resultBuffer = resultBuffer.rows // TODO: Recover the code. // case <-e.ctx.GoCtx().Done(): // return nil, nil } } // len(e.resultBuffer) > 0 is guaranteed in the above "select". result := e.resultBuffer[e.resultCursor] e.resultCursor++ return result, nil } // NestedLoopApplyExec is the executor for apply. type NestedLoopApplyExec struct { baseExecutor innerRows []Row cursor int resultRows []Row innerExec Executor outerExec Executor innerFilter expression.CNFExprs outerFilter expression.CNFExprs outer bool resultGenerator joinResultGenerator outerSchema []*expression.CorrelatedColumn outerChunk *chunk.Chunk outerChunkCursor int outerSelected []bool innerList *chunk.List innerChunk *chunk.Chunk innerSelected []bool resultChunk *chunk.Chunk } // Close implements the Executor interface. func (e *NestedLoopApplyExec) Close() error { e.resultRows = nil e.innerRows = nil return errors.Trace(e.outerExec.Close()) } // Open implements the Executor interface. func (e *NestedLoopApplyExec) Open(goCtx goctx.Context) error { e.cursor = 0 e.resultRows = e.resultRows[:0] e.innerRows = e.innerRows[:0] return errors.Trace(e.outerExec.Open(goCtx)) } func (e *NestedLoopApplyExec) fetchOuterRow(goCtx goctx.Context) (Row, bool, error) { for { outerRow, err := e.outerExec.Next(goCtx) if err != nil { return nil, false, errors.Trace(err) } if outerRow == nil { return nil, false, nil } matched, err := expression.EvalBool(e.outerFilter, outerRow, e.ctx) if err != nil { return nil, false, errors.Trace(err) } if matched { return outerRow, true, nil } else if e.outer { return outerRow, false, nil } } } func (e *NestedLoopApplyExec) fetchSelectedOuterRow(goCtx goctx.Context) (*chunk.Row, error) { for { if e.outerChunkCursor >= e.outerChunk.NumRows() { err := e.outerExec.NextChunk(goCtx, e.outerChunk) if err != nil { return nil, errors.Trace(err) } if e.outerChunk.NumRows() == 0 { return nil, nil } e.outerSelected, err = expression.VectorizedFilter(e.ctx, e.outerFilter, e.outerChunk, e.outerSelected) if err != nil { return nil, errors.Trace(err) } e.outerChunkCursor = 0 } outerRow := e.outerChunk.GetRow(e.outerChunkCursor) selected := e.outerSelected[e.outerChunkCursor] e.outerChunkCursor++ if selected { return &outerRow, nil } else if e.outer { err := e.resultGenerator.emitToChunk(outerRow, nil, e.resultChunk) if err != nil { return nil, errors.Trace(err) } } } } // prepare reads all data from the inner table and stores them in a slice. func (e *NestedLoopApplyExec) prepare(goCtx goctx.Context) error { err := e.innerExec.Open(goctx.TODO()) if err != nil { return errors.Trace(err) } defer terror.Call(e.innerExec.Close) e.innerRows = e.innerRows[:0] for { row, err := e.innerExec.Next(goCtx) if err != nil { return errors.Trace(err) } if row == nil { return nil } matched, err := expression.EvalBool(e.innerFilter, row, e.ctx) if err != nil { return errors.Trace(err) } if matched { e.innerRows = append(e.innerRows, row) } } } // fetchAllInners reads all data from the inner table and stores them in a List. func (e *NestedLoopApplyExec) fetchAllInners(goCtx goctx.Context) error { err := e.innerExec.Open(goCtx) defer terror.Call(e.innerExec.Close) if err != nil { return errors.Trace(err) } e.innerList.Reset() for { err := e.innerExec.NextChunk(goCtx, e.innerChunk) if err != nil { return errors.Trace(err) } if e.innerChunk.NumRows() == 0 { return nil } e.innerSelected, err = expression.VectorizedFilter(e.ctx, e.innerFilter, e.innerChunk, e.innerSelected) if err != nil { return errors.Trace(err) } for row := e.innerChunk.Begin(); row != e.innerChunk.End(); row = row.Next() { if e.innerSelected[row.Idx()] { e.innerList.AppendRow(row) } } } } func (e *NestedLoopApplyExec) doJoin(outerRow Row, match bool) ([]Row, error) { e.resultRows = e.resultRows[0:0] var err error if !match && e.outer { e.resultRows, err = e.resultGenerator.emit(outerRow, nil, e.resultRows) if err != nil { return nil, errors.Trace(err) } return e.resultRows, nil } e.resultRows, err = e.resultGenerator.emit(outerRow, e.innerRows, e.resultRows) if err != nil { return nil, errors.Trace(err) } return e.resultRows, nil } // Next implements the Executor interface. func (e *NestedLoopApplyExec) Next(goCtx goctx.Context) (Row, error) { for { if e.cursor < len(e.resultRows) { row := e.resultRows[e.cursor] e.cursor++ return row, nil } outerRow, match, err := e.fetchOuterRow(goCtx) if outerRow == nil || err != nil { return nil, errors.Trace(err) } for _, col := range e.outerSchema { *col.Data = outerRow[col.Index] } err = e.prepare(goCtx) if err != nil { return nil, errors.Trace(err) } e.resultRows, err = e.doJoin(outerRow, match) if err != nil { return nil, errors.Trace(err) } e.cursor = 0 } } // NextChunk implements the Executor interface. func (e *NestedLoopApplyExec) NextChunk(goCtx goctx.Context, chk *chunk.Chunk) error { chk.Reset() for { appendSize := mathutil.Min(e.resultChunk.NumRows()-e.cursor, e.maxChunkSize) if appendSize > 0 { chk.Append(e.resultChunk, e.cursor, e.cursor+appendSize) e.cursor += appendSize if chk.NumRows() == e.maxChunkSize { return nil } } outerRow, err := e.fetchSelectedOuterRow(goCtx) if outerRow == nil || err != nil { return errors.Trace(err) } for _, col := range e.outerSchema { *col.Data = outerRow.GetDatum(col.Index, col.RetType) } err = e.fetchAllInners(goCtx) if err != nil { return errors.Trace(err) } e.resultChunk.Reset() iter := chunk.NewListIterator(e.innerList) err = e.resultGenerator.emitToChunk(*outerRow, iter, e.resultChunk) if err != nil { return errors.Trace(err) } e.cursor = 0 } }