executor: reduce memory usage and GC overhead for hash join. (#2957)

Implemented and use MVMap to reduce GC overhead and memory usage for hash join.
This commit is contained in:
Ewan Chou
2017-03-30 23:04:02 +08:00
committed by GitHub
parent 6860e0e6b7
commit a168f413ba
7 changed files with 329 additions and 41 deletions

View File

@ -44,9 +44,13 @@ const (
func resultRowToRow(t table.Table, h int64, data []types.Datum, tableAsName *model.CIStr) *Row {
entry := &RowKeyEntry{
Handle: h,
Tbl: t,
TableAsName: tableAsName,
Handle: h,
Tbl: t,
}
if tableAsName != nil && tableAsName.L != "" {
entry.TableName = tableAsName.L
} else {
entry.TableName = t.Meta().Name.L
}
return &Row{Data: data, RowKeys: []*RowKeyEntry{entry}}
}

View File

@ -100,7 +100,7 @@ type RowKeyEntry struct {
// Row key.
Handle int64
// Table alias name.
TableAsName *model.CIStr
TableName string
}
// Executor executes a query.
@ -691,9 +691,13 @@ func (e *TableScanExec) getRow(handle int64) (*Row, error) {
// Put rowKey to the tail of record row.
rke := &RowKeyEntry{
Tbl: e.t,
Handle: handle,
TableAsName: e.asName,
Tbl: e.t,
Handle: handle,
}
if e.asName != nil && e.asName.L != "" {
rke.TableName = e.asName.L
} else {
rke.TableName = e.t.Meta().Name.L
}
row.RowKeys = append(row.RowKeys, rke)
return row, nil

View File

@ -22,6 +22,7 @@ import (
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/mvmap"
"github.com/pingcap/tidb/util/types"
)
@ -34,7 +35,7 @@ var (
// HashJoinExec implements the hash join algorithm.
type HashJoinExec struct {
hashTable map[string][]*Row
hashTable *mvmap.MVMap
smallHashKey []*expression.Column
bigHashKey []*expression.Column
smallExec Executor
@ -66,6 +67,10 @@ type HashJoinExec struct {
// Channels for output.
resultCh chan *execResult
// rowKeyCache is used to store the table and table name from a row.
// Because every row has the same table name and table, we can use a single row key cache.
rowKeyCache []*RowKeyEntry
}
// hashJoinCtx holds the variables needed to do a hash join in one of many concurrent goroutines.
@ -104,9 +109,9 @@ func makeJoinRow(a *Row, b *Row) *Row {
return ret
}
// getHashKey gets the hash key when given a row and hash columns.
// getJoinKey gets the hash key when given a row and hash columns.
// It will return a boolean value representing if the hash key has null, a byte slice representing the result hash code.
func getHashKey(sc *variable.StatementContext, cols []*expression.Column, row *Row, targetTypes []*types.FieldType,
func getJoinKey(sc *variable.StatementContext, cols []*expression.Column, row *Row, targetTypes []*types.FieldType,
vals []types.Datum, bytes []byte) (bool, []byte, error) {
var err error
for i, col := range cols {
@ -208,9 +213,10 @@ func (e *HashJoinExec) prepare() error {
e.wg.Add(1)
go e.fetchBigExec()
e.hashTable = make(map[string][]*Row)
e.hashTable = mvmap.NewMVMap()
e.cursor = 0
sc := e.ctx.GetSessionVars().StmtCtx
var buffer []byte
for {
row, err := e.smallExec.Next()
if err != nil {
@ -231,18 +237,19 @@ func (e *HashJoinExec) prepare() error {
continue
}
}
hasNull, hashcode, err := getHashKey(sc, e.smallHashKey, row, e.targetTypes, e.hashJoinContexts[0].datumBuffer, nil)
hasNull, joinKey, err := getJoinKey(sc, e.smallHashKey, row, e.targetTypes, e.hashJoinContexts[0].datumBuffer, nil)
if err != nil {
return errors.Trace(err)
}
if hasNull {
continue
}
if rows, ok := e.hashTable[string(hashcode)]; !ok {
e.hashTable[string(hashcode)] = []*Row{row}
} else {
e.hashTable[string(hashcode)] = append(rows, row)
buffer = buffer[:0]
buffer, err = e.encodeRow(buffer, row)
if err != nil {
return errors.Trace(err)
}
e.hashTable.Put(joinKey, buffer)
}
e.resultCh = make(chan *execResult, e.concurrency)
@ -257,6 +264,54 @@ func (e *HashJoinExec) prepare() error {
return nil
}
func (e *HashJoinExec) encodeRow(b []byte, row *Row) ([]byte, error) {
numRowKeys := int64(len(row.RowKeys))
b = codec.EncodeVarint(b, numRowKeys)
for _, rowKey := range row.RowKeys {
b = codec.EncodeVarint(b, rowKey.Handle)
}
if numRowKeys > 0 && e.rowKeyCache == nil {
e.rowKeyCache = make([]*RowKeyEntry, len(row.RowKeys))
for i := 0; i < len(row.RowKeys); i++ {
rk := new(RowKeyEntry)
rk.Tbl = row.RowKeys[i].Tbl
rk.TableName = row.RowKeys[i].TableName
e.rowKeyCache[i] = rk
}
}
b, err := codec.EncodeValue(b, row.Data...)
return b, errors.Trace(err)
}
func (e *HashJoinExec) decodeRow(data []byte) (*Row, error) {
row := new(Row)
data, entryLen, err := codec.DecodeVarint(data)
if err != nil {
return nil, errors.Trace(err)
}
for i := 0; i < int(entryLen); i++ {
entry := new(RowKeyEntry)
data, entry.Handle, err = codec.DecodeVarint(data)
if err != nil {
return nil, errors.Trace(err)
}
entry.Tbl = e.rowKeyCache[i].Tbl
entry.TableName = e.rowKeyCache[i].TableName
row.RowKeys = append(row.RowKeys, entry)
}
values := make([]types.Datum, e.smallExec.Schema().Len())
err = codec.SetRawValues(data, values)
if err != nil {
return nil, errors.Trace(err)
}
err = decodeRawValues(values, e.smallExec.Schema())
if err != nil {
return nil, errors.Trace(err)
}
row.Data = values
return row, nil
}
func (e *HashJoinExec) waitJoinWorkersAndCloseResultChan() {
e.wg.Wait()
close(e.resultCh)
@ -341,7 +396,7 @@ func (e *HashJoinExec) joinOneBigRow(ctx *hashJoinCtx, bigRow *Row, result *exec
// constructMatchedRows creates matching result rows from a row in the big table.
func (e *HashJoinExec) constructMatchedRows(ctx *hashJoinCtx, bigRow *Row) (matchedRows []*Row, err error) {
sc := e.ctx.GetSessionVars().StmtCtx
hasNull, hashcode, err := getHashKey(sc, e.bigHashKey, bigRow, e.targetTypes, ctx.datumBuffer, ctx.hashKeyBuffer[0:0:cap(ctx.hashKeyBuffer)])
hasNull, joinKey, err := getJoinKey(sc, e.bigHashKey, bigRow, e.targetTypes, ctx.datumBuffer, ctx.hashKeyBuffer[0:0:cap(ctx.hashKeyBuffer)])
if err != nil {
return nil, errors.Trace(err)
}
@ -349,12 +404,17 @@ func (e *HashJoinExec) constructMatchedRows(ctx *hashJoinCtx, bigRow *Row) (matc
if hasNull {
return
}
rows, ok := e.hashTable[string(hashcode)]
if !ok {
values := e.hashTable.Get(joinKey)
if len(values) == 0 {
return
}
// match eq condition
for _, smallRow := range rows {
for _, value := range values {
var smallRow *Row
smallRow, err = e.decodeRow(value)
if err != nil {
return nil, errors.Trace(err)
}
otherMatched := true
var matchedRow *Row
if e.leftSmall {
@ -372,7 +432,6 @@ func (e *HashJoinExec) constructMatchedRows(ctx *hashJoinCtx, bigRow *Row) (matc
matchedRows = append(matchedRows, matchedRow)
}
}
return matchedRows, nil
}
@ -662,7 +721,7 @@ func (e *HashSemiJoinExec) prepare() error {
continue
}
}
hasNull, hashcode, err := getHashKey(sc, e.smallHashKey, row, e.targetTypes, make([]types.Datum, len(e.smallHashKey)), nil)
hasNull, hashcode, err := getJoinKey(sc, e.smallHashKey, row, e.targetTypes, make([]types.Datum, len(e.smallHashKey)), nil)
if err != nil {
return errors.Trace(err)
}
@ -683,7 +742,7 @@ func (e *HashSemiJoinExec) prepare() error {
func (e *HashSemiJoinExec) rowIsMatched(bigRow *Row) (matched bool, hasNull bool, err error) {
sc := e.ctx.GetSessionVars().StmtCtx
hasNull, hashcode, err := getHashKey(sc, e.bigHashKey, bigRow, e.targetTypes, make([]types.Datum, len(e.smallHashKey)), nil)
hasNull, hashcode, err := getJoinKey(sc, e.bigHashKey, bigRow, e.targetTypes, make([]types.Datum, len(e.smallHashKey)), nil)
if err != nil {
return false, false, errors.Trace(err)
}

View File

@ -258,7 +258,13 @@ func (us *UnionScanExec) buildAndSortAddedRows(t table.Table, asName *model.CISt
continue
}
}
rowKeyEntry := &RowKeyEntry{Handle: h, Tbl: t, TableAsName: asName}
rowKeyEntry := &RowKeyEntry{Handle: h, Tbl: t}
if asName != nil && asName.L != "" {
rowKeyEntry.TableName = asName.L
} else {
rowKeyEntry.TableName = t.Meta().Name.L
}
row := &Row{Data: newData, RowKeys: []*RowKeyEntry{rowKeyEntry}}
us.addedRows = append(us.addedRows, row)
}

View File

@ -214,20 +214,12 @@ func (e *DeleteExec) deleteMultiTables() error {
}
func isMatchTableName(entry *RowKeyEntry, tblMap map[int64][]string) bool {
var name string
if entry.TableAsName != nil {
name = entry.TableAsName.L
}
if len(name) == 0 {
name = entry.Tbl.Meta().Name.L
}
names, ok := tblMap[entry.Tbl.Meta().ID]
if !ok {
return false
}
for _, n := range names {
if n == name {
if n == entry.TableName {
return true
}
}
@ -1202,16 +1194,9 @@ func (e *UpdateExec) fetchRows() error {
}
func getTableOffset(schema *expression.Schema, entry *RowKeyEntry) int {
t := entry.Tbl
var tblName string
if entry.TableAsName == nil || len(entry.TableAsName.L) == 0 {
tblName = t.Meta().Name.L
} else {
tblName = entry.TableAsName.L
}
for i := 0; i < schema.Len(); i++ {
s := schema.Columns[i]
if s.TblName.L == tblName {
if s.TblName.L == entry.TableName {
return i
}
}

167
util/mvmap/mvmap.go Normal file
View File

@ -0,0 +1,167 @@
// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package mvmap
import (
"bytes"
"hash"
"hash/fnv"
)
type entry struct {
addr dataAddr
keyLen uint32
valLen uint32
next entryAddr
}
type entryStore struct {
slices [][]entry
sliceIdx uint32
sliceLen uint32
}
type dataStore struct {
slices [][]byte
sliceIdx uint32
sliceLen uint32
}
type entryAddr struct {
sliceIdx uint32
offset uint32
}
type dataAddr struct {
sliceIdx uint32
offset uint32
}
const (
maxDataSliceLen = 64 * 1024
maxEntrySliceLen = 8 * 1024
)
func (ds *dataStore) put(key, value []byte) dataAddr {
dataLen := uint32(len(key) + len(value))
if ds.sliceLen != 0 && ds.sliceLen+dataLen > maxDataSliceLen {
ds.slices = append(ds.slices, make([]byte, 0, max(maxDataSliceLen, int(dataLen))))
ds.sliceLen = 0
ds.sliceIdx++
}
addr := dataAddr{sliceIdx: ds.sliceIdx, offset: ds.sliceLen}
slice := ds.slices[ds.sliceIdx]
slice = append(slice, key...)
slice = append(slice, value...)
ds.slices[ds.sliceIdx] = slice
ds.sliceLen += dataLen
return addr
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func (ds *dataStore) get(e entry, key []byte) []byte {
slice := ds.slices[e.addr.sliceIdx]
valOffset := e.addr.offset + e.keyLen
if bytes.Compare(key, slice[e.addr.offset:valOffset]) != 0 {
return nil
}
return slice[valOffset : valOffset+e.valLen]
}
var nullEntryAddr = entryAddr{}
func (es *entryStore) put(e entry) entryAddr {
if es.sliceLen == maxEntrySliceLen {
es.slices = append(es.slices, make([]entry, 0, maxEntrySliceLen))
es.sliceLen = 0
es.sliceIdx++
}
addr := entryAddr{sliceIdx: es.sliceIdx, offset: es.sliceLen}
slice := es.slices[es.sliceIdx]
slice = append(slice, e)
es.slices[es.sliceIdx] = slice
es.sliceLen++
return addr
}
func (es *entryStore) get(addr entryAddr) entry {
return es.slices[addr.sliceIdx][addr.offset]
}
// MVMap stores multiple value for a given key with minimum GC overhead.
// A given key can store multiple values.
// It is not thread-safe, should only be used in one goroutine.
type MVMap struct {
entryStore entryStore
dataStore dataStore
hashTable map[uint64]entryAddr
hashFunc hash.Hash64
}
// NewMVMap creates a new multi-value map.
func NewMVMap() *MVMap {
m := new(MVMap)
m.hashTable = make(map[uint64]entryAddr)
m.hashFunc = fnv.New64()
m.entryStore.slices = [][]entry{make([]entry, 0, 64)}
// append first empty entry so zero entry pointer an represent null.
m.entryStore.put(entry{})
m.dataStore.slices = [][]byte{make([]byte, 0, 1024)}
return m
}
// Put puts the key/value pairs to the MVMap, if the key already exists, old value will not be overwritten,
// values are stored in a list.
func (m *MVMap) Put(key, value []byte) {
hashKey := m.hash(key)
oldEntryAddr := m.hashTable[hashKey]
dataAddr := m.dataStore.put(key, value)
e := entry{
addr: dataAddr,
keyLen: uint32(len(key)),
valLen: uint32(len(value)),
next: oldEntryAddr,
}
newEntryPtr := m.entryStore.put(e)
m.hashTable[hashKey] = newEntryPtr
}
// Get gets the values of the key.
func (m *MVMap) Get(key []byte) [][]byte {
var values [][]byte
hashKey := m.hash(key)
entryAddr := m.hashTable[hashKey]
for entryAddr != nullEntryAddr {
e := m.entryStore.get(entryAddr)
entryAddr = e.next
val := m.dataStore.get(e, key)
if val == nil {
continue
}
values = append(values, val)
}
return values
}
func (m *MVMap) hash(key []byte) uint64 {
m.hashFunc.Reset()
m.hashFunc.Write(key)
return m.hashFunc.Sum64()
}

63
util/mvmap/mvmap_test.go Normal file
View File

@ -0,0 +1,63 @@
// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package mvmap
import (
"bytes"
"encoding/binary"
"fmt"
"testing"
)
func TestMVMap(t *testing.T) {
m := NewMVMap()
m.Put([]byte("abc"), []byte("abc1"))
m.Put([]byte("abc"), []byte("abc2"))
m.Put([]byte("def"), []byte("def1"))
m.Put([]byte("def"), []byte("def2"))
vals := m.Get([]byte("abc"))
if fmt.Sprintf("%s", vals) != "[abc2 abc1]" {
t.FailNow()
}
vals = m.Get([]byte("def"))
if fmt.Sprintf("%s", vals) != "[def2 def1]" {
t.FailNow()
}
}
func BenchmarkMVMapPut(b *testing.B) {
m := NewMVMap()
buffer := make([]byte, 8)
for i := 0; i < b.N; i++ {
binary.BigEndian.PutUint64(buffer, uint64(i))
m.Put(buffer, buffer)
}
}
func BenchmarkMVMapGet(b *testing.B) {
m := NewMVMap()
buffer := make([]byte, 8)
for i := 0; i < b.N; i++ {
binary.BigEndian.PutUint64(buffer, uint64(i))
m.Put(buffer, buffer)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
binary.BigEndian.PutUint64(buffer, uint64(i))
val := m.Get(buffer)
if len(val) != 1 || bytes.Compare(val[0], buffer) != 0 {
b.FailNow()
}
}
}