Files
tidb/executor/new_executor.go
zimulala 4f840a7754 *: Tiny clean up (#1244)
* *: Tiny clean up

* optimizer: update test
2016-05-20 12:59:01 +08:00

239 lines
5.5 KiB
Go

// Copyright 2016 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package executor
import (
"github.com/juju/errors"
"github.com/pingcap/tidb/ast"
"github.com/pingcap/tidb/context"
"github.com/pingcap/tidb/evaluator"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/types"
)
// HashJoinExec implements the hash join algorithm.
type HashJoinExec struct {
hashTable map[string][]*Row
smallHashKey []ast.ExprNode
bigHashKey []ast.ExprNode
smallExec Executor
bigExec Executor
prepared bool
fields []*ast.ResultField
ctx context.Context
smallFilter ast.ExprNode
bigFilter ast.ExprNode
otherFilter ast.ExprNode
outter bool
leftSmall bool
matchedRows []*Row
cursor int
}
func joinTwoRow(a *Row, b *Row) *Row {
ret := &Row{
RowKeys: make([]*RowKeyEntry, 0, len(a.RowKeys)+len(b.RowKeys)),
Data: make([]types.Datum, 0, len(a.Data)+len(b.Data)),
}
ret.RowKeys = append(ret.RowKeys, a.RowKeys...)
ret.RowKeys = append(ret.RowKeys, b.RowKeys...)
ret.Data = append(ret.Data, a.Data...)
ret.Data = append(ret.Data, b.Data...)
return ret
}
func (e *HashJoinExec) getHashKey(exprs []ast.ExprNode) ([]byte, error) {
vals := make([]types.Datum, 0, len(exprs))
for _, expr := range exprs {
v, err := evaluator.Eval(e.ctx, expr)
if err != nil {
return nil, errors.Trace(err)
}
vals = append(vals, v)
}
if len(vals) == 0 {
return []byte{}, nil
}
return codec.EncodeValue([]byte{}, vals...)
}
// Fields implements Executor Fields interface.
func (e *HashJoinExec) Fields() []*ast.ResultField {
return e.fields
}
// Close implements Executor Close interface.
func (e *HashJoinExec) Close() error {
e.hashTable = nil
e.matchedRows = nil
return nil
}
func (e *HashJoinExec) prepare() error {
e.hashTable = make(map[string][]*Row)
e.cursor = 0
for {
row, err := e.smallExec.Next()
if err != nil {
return errors.Trace(err)
}
if row == nil {
e.smallExec.Close()
break
}
matched := true
if e.smallFilter != nil {
matched, err = evaluator.EvalBool(e.ctx, e.smallFilter)
if err != nil {
return errors.Trace(err)
}
if !matched {
continue
}
}
hashcode, err := e.getHashKey(e.smallHashKey)
if err != nil {
return errors.Trace(err)
}
if rows, ok := e.hashTable[string(hashcode)]; !ok {
e.hashTable[string(hashcode)] = []*Row{row}
} else {
e.hashTable[string(hashcode)] = append(rows, row)
}
}
e.prepared = true
return nil
}
func (e *HashJoinExec) constructMatchedRows(bigRow *Row) (matchedRows []*Row, err error) {
hashcode, err := e.getHashKey(e.bigHashKey)
if err != nil {
return nil, errors.Trace(err)
}
rows, ok := e.hashTable[string(hashcode)]
if !ok {
return
}
// match eq condition
for _, smallRow := range rows {
//TODO: remove result fields in order to reduce memory copy cost.
otherMatched := true
if e.otherFilter != nil {
startKey := 0
if !e.leftSmall {
startKey = len(bigRow.Data)
}
for i, data := range smallRow.Data {
e.fields[i+startKey].Expr.SetValue(data.GetValue())
}
otherMatched, err = evaluator.EvalBool(e.ctx, e.otherFilter)
}
if err != nil {
return nil, errors.Trace(err)
}
if otherMatched {
if e.leftSmall {
matchedRows = append(matchedRows, joinTwoRow(smallRow, bigRow))
} else {
matchedRows = append(matchedRows, joinTwoRow(bigRow, smallRow))
}
}
}
return matchedRows, nil
}
func (e *HashJoinExec) fillNullRow(bigRow *Row) (returnRow *Row, err error) {
smallRow := &Row{
RowKeys: make([]*RowKeyEntry, len(e.smallExec.Fields())),
Data: make([]types.Datum, len(e.smallExec.Fields())),
}
for _, data := range smallRow.Data {
data.SetNull()
}
if e.leftSmall {
returnRow = joinTwoRow(smallRow, bigRow)
} else {
returnRow = joinTwoRow(bigRow, smallRow)
}
for i, data := range returnRow.Data {
e.fields[i].Expr.SetValue(data.GetValue())
}
return returnRow, nil
}
func (e *HashJoinExec) returnRecord() (ret *Row, ok bool) {
if e.cursor >= len(e.matchedRows) {
return nil, false
}
for i, data := range e.matchedRows[e.cursor].Data {
e.fields[i].Expr.SetValue(data.GetValue())
}
e.cursor++
return e.matchedRows[e.cursor-1], true
}
// Next implements Executor Next interface.
func (e *HashJoinExec) Next() (*Row, error) {
if !e.prepared {
if err := e.prepare(); err != nil {
return nil, errors.Trace(err)
}
}
row, ok := e.returnRecord()
if ok {
return row, nil
}
for {
bigRow, err := e.bigExec.Next()
if err != nil {
return nil, errors.Trace(err)
}
if bigRow == nil {
e.bigExec.Close()
return nil, nil
}
var matchedRows []*Row
bigMatched := true
if e.bigFilter != nil {
bigMatched, err = evaluator.EvalBool(e.ctx, e.bigFilter)
if err != nil {
return nil, errors.Trace(err)
}
}
if bigMatched {
matchedRows, err = e.constructMatchedRows(bigRow)
if err != nil {
return nil, errors.Trace(err)
}
}
e.matchedRows = matchedRows
e.cursor = 0
row, ok := e.returnRecord()
if ok {
return row, nil
} else if e.outter {
return e.fillNullRow(bigRow)
}
}
}