469 lines
13 KiB
Go
469 lines
13 KiB
Go
// Copyright 2019 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package chunk
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"math/rand"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
|
|
errors2 "github.com/pingcap/errors"
|
|
"github.com/pingcap/tidb/config"
|
|
"github.com/pingcap/tidb/parser/mysql"
|
|
"github.com/pingcap/tidb/types"
|
|
"github.com/pingcap/tidb/util/mathutil"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func initChunks(numChk, numRow int) ([]*Chunk, []*types.FieldType) {
|
|
fields := []*types.FieldType{
|
|
types.NewFieldType(mysql.TypeVarString),
|
|
types.NewFieldType(mysql.TypeLonglong),
|
|
types.NewFieldType(mysql.TypeVarString),
|
|
types.NewFieldType(mysql.TypeLonglong),
|
|
types.NewFieldType(mysql.TypeJSON),
|
|
}
|
|
|
|
chks := make([]*Chunk, 0, numChk)
|
|
for chkIdx := 0; chkIdx < numChk; chkIdx++ {
|
|
chk := NewChunkWithCapacity(fields, numRow)
|
|
for rowIdx := 0; rowIdx < numRow; rowIdx++ {
|
|
data := int64(chkIdx*numRow + rowIdx)
|
|
chk.AppendString(0, fmt.Sprint(data))
|
|
chk.AppendNull(1)
|
|
chk.AppendNull(2)
|
|
chk.AppendInt64(3, data)
|
|
if chkIdx%2 == 0 {
|
|
chk.AppendJSON(4, types.CreateBinaryJSON(fmt.Sprint(data)))
|
|
} else {
|
|
chk.AppendNull(4)
|
|
}
|
|
}
|
|
chks = append(chks, chk)
|
|
}
|
|
return chks, fields
|
|
}
|
|
|
|
func TestListInDisk(t *testing.T) {
|
|
numChk, numRow := 2, 2
|
|
chks, fields := initChunks(numChk, numRow)
|
|
l := NewListInDisk(fields)
|
|
defer func() {
|
|
err := l.Close()
|
|
require.NoError(t, err)
|
|
require.NotNil(t, l.dataFile.disk)
|
|
_, err = os.Stat(l.dataFile.disk.Name())
|
|
require.True(t, os.IsNotExist(err))
|
|
}()
|
|
for _, chk := range chks {
|
|
err := l.Add(chk)
|
|
assert.NoError(t, err)
|
|
}
|
|
require.True(t, strings.HasPrefix(l.dataFile.disk.Name(), filepath.Join(os.TempDir(), "tidb_enable_tmp_storage_on_oom")))
|
|
assert.Equal(t, numChk, l.NumChunks())
|
|
assert.Greater(t, l.GetDiskTracker().BytesConsumed(), int64(0))
|
|
|
|
for chkIdx := 0; chkIdx < numChk; chkIdx++ {
|
|
for rowIdx := 0; rowIdx < numRow; rowIdx++ {
|
|
row, err := l.GetRow(RowPtr{ChkIdx: uint32(chkIdx), RowIdx: uint32(rowIdx)})
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, chks[chkIdx].GetRow(rowIdx).GetDatumRow(fields), row.GetDatumRow(fields))
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkListInDiskAdd(b *testing.B) {
|
|
numChk, numRow := 1, 2
|
|
chks, fields := initChunks(numChk, numRow)
|
|
chk := chks[0]
|
|
l := NewListInDisk(fields)
|
|
defer l.Close()
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
err := l.Add(chk)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkListInDiskGetRow(b *testing.B) {
|
|
numChk, numRow := 10000, 2
|
|
chks, fields := initChunks(numChk, numRow)
|
|
l := NewListInDisk(fields)
|
|
defer l.Close()
|
|
for _, chk := range chks {
|
|
err := l.Add(chk)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
rand.Seed(0)
|
|
ptrs := make([]RowPtr, 0, b.N)
|
|
for i := 0; i < mathutil.Min(b.N, 10000); i++ {
|
|
ptrs = append(ptrs, RowPtr{
|
|
ChkIdx: rand.Uint32() % uint32(numChk),
|
|
RowIdx: rand.Uint32() % uint32(numRow),
|
|
})
|
|
}
|
|
for i := 10000; i < cap(ptrs); i++ {
|
|
ptrs = append(ptrs, ptrs[i%10000])
|
|
}
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := l.GetRow(ptrs[i])
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
type listInDiskWriteDisk struct {
|
|
ListInDisk
|
|
}
|
|
|
|
func (l *diskFileReaderWriter) flushForTest() (err error) {
|
|
err = l.disk.Close()
|
|
if err != nil {
|
|
return
|
|
}
|
|
l.w = nil
|
|
// the l.disk is the underlying object of the l.w, it will be closed
|
|
// after calling l.w.Close, we need to reopen it before reading rows.
|
|
l.disk, err = os.Open(l.disk.Name())
|
|
if err != nil {
|
|
return errors2.Trace(err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func newListInDiskWriteDisk(fieldTypes []*types.FieldType) (*listInDiskWriteDisk, error) {
|
|
l := listInDiskWriteDisk{*NewListInDisk(fieldTypes)}
|
|
disk, err := os.CreateTemp(config.GetGlobalConfig().TempStoragePath, strconv.Itoa(l.diskTracker.Label()))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
l.dataFile.disk = disk
|
|
l.dataFile.w = disk
|
|
|
|
disk2, err := os.CreateTemp(config.GetGlobalConfig().TempStoragePath, "offset"+strconv.Itoa(l.diskTracker.Label()))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
l.offsetFile.disk = disk2
|
|
l.offsetFile.w = disk2
|
|
return &l, nil
|
|
}
|
|
|
|
func (l *listInDiskWriteDisk) GetRow(ptr RowPtr) (row Row, err error) {
|
|
err = l.flushForTest()
|
|
off, err := l.getOffset(ptr.ChkIdx, ptr.RowIdx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
r := io.NewSectionReader(l.dataFile.disk, off, l.dataFile.offWrite-off)
|
|
format := rowInDisk{numCol: len(l.fieldTypes)}
|
|
_, err = format.ReadFrom(r)
|
|
if err != nil {
|
|
return row, err
|
|
}
|
|
row, _ = format.toRow(l.fieldTypes, nil)
|
|
return row, err
|
|
}
|
|
|
|
func (l *listInDiskWriteDisk) flushForTest() (err error) {
|
|
err = l.dataFile.flushForTest()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return l.offsetFile.flushForTest()
|
|
}
|
|
|
|
func checkRow(t *testing.T, row1, row2 Row) {
|
|
require.Equal(t, row2.GetString(0), row1.GetString(0))
|
|
require.Equal(t, row2.GetInt64(1), row1.GetInt64(1))
|
|
require.Equal(t, row2.GetString(2), row1.GetString(2))
|
|
require.Equal(t, row2.GetInt64(3), row1.GetInt64(3))
|
|
if !row1.IsNull(4) {
|
|
require.Equal(t, row2.GetJSON(4).String(), row1.GetJSON(4).String())
|
|
}
|
|
}
|
|
|
|
func testListInDisk(t *testing.T, concurrency int) {
|
|
numChk, numRow := 10, 1000
|
|
chks, fields := initChunks(numChk, numRow)
|
|
lChecksum := NewListInDisk(fields)
|
|
defer lChecksum.Close()
|
|
lDisk, err := newListInDiskWriteDisk(fields)
|
|
require.NoError(t, err)
|
|
defer lDisk.Close()
|
|
for _, chk := range chks {
|
|
err := lChecksum.Add(chk)
|
|
require.NoError(t, err)
|
|
err = lDisk.Add(chk)
|
|
require.NoError(t, err)
|
|
}
|
|
|
|
var ptrs []RowPtr
|
|
for i := 0; i < numChk; i++ {
|
|
for j := 0; j < numRow; j++ {
|
|
ptrs = append(ptrs, RowPtr{
|
|
ChkIdx: uint32(i),
|
|
RowIdx: uint32(j),
|
|
})
|
|
}
|
|
}
|
|
|
|
expectRows := make([]Row, 0, len(ptrs))
|
|
for _, rowPtr := range ptrs {
|
|
row, err := lDisk.GetRow(rowPtr)
|
|
require.NoError(t, err)
|
|
expectRows = append(expectRows, row)
|
|
}
|
|
|
|
wg := sync.WaitGroup{}
|
|
wg.Add(concurrency)
|
|
for con := 0; con < concurrency; con++ {
|
|
go func() {
|
|
for i, rowPtr := range ptrs {
|
|
row, err := lChecksum.GetRow(rowPtr)
|
|
require.NoError(t, err)
|
|
checkRow(t, row, expectRows[i])
|
|
}
|
|
wg.Done()
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
func BenchmarkListInDisk_GetChunk(b *testing.B) {
|
|
numChk, numRow := 10, 1000
|
|
chks, fields := initChunks(numChk, numRow)
|
|
l := NewListInDisk(fields)
|
|
defer l.Close()
|
|
for _, chk := range chks {
|
|
_ = l.Add(chk)
|
|
}
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
v := i % numChk
|
|
_, _ = l.GetChunk(v)
|
|
}
|
|
}
|
|
|
|
func TestListInDiskWithChecksum1(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext
|
|
})
|
|
testListInDisk(t, 1)
|
|
}
|
|
|
|
func TestListInDiskWithChecksum2(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext
|
|
})
|
|
testListInDisk(t, 2)
|
|
}
|
|
|
|
func TestListInDiskWithChecksum8(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext
|
|
})
|
|
testListInDisk(t, 8)
|
|
}
|
|
|
|
func TestListInDiskWithChecksumReaderWithCache(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext
|
|
})
|
|
testReaderWithCache(t)
|
|
}
|
|
|
|
func TestListInDiskWithChecksumReaderWithCacheNoFlush(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext
|
|
})
|
|
testReaderWithCacheNoFlush(t)
|
|
}
|
|
|
|
func TestListInDiskWithChecksumAndEncrypt1(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR
|
|
})
|
|
testListInDisk(t, 1)
|
|
}
|
|
|
|
func TestListInDiskWithChecksumAndEncrypt2(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR
|
|
})
|
|
testListInDisk(t, 2)
|
|
}
|
|
|
|
func TestListInDiskWithChecksumAndEncrypt8(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR
|
|
})
|
|
testListInDisk(t, 8)
|
|
}
|
|
|
|
func TestListInDiskWithChecksumAndEncryptReaderWithCache(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR
|
|
})
|
|
testReaderWithCache(t)
|
|
}
|
|
|
|
func TestListInDiskWithChecksumAndEncryptReaderWithCacheNoFlush(t *testing.T) {
|
|
defer config.RestoreFunc()()
|
|
config.UpdateGlobal(func(conf *config.Config) {
|
|
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR
|
|
})
|
|
testReaderWithCacheNoFlush(t)
|
|
}
|
|
|
|
// Following diagram describes the testdata we use to test:
|
|
// 4 B: checksum of this segment.
|
|
// 8 B: all columns' length, in the following example, we will only have one column.
|
|
// 1012 B: data in file. because max length of each segment is 1024, so we only have 1020B for user payload.
|
|
/*
|
|
Data in File Data in mem cache
|
|
+------+------------------------------------------+ +-----------------------------+
|
|
| | 1020B payload | | |
|
|
|4Bytes| +---------+----------------------------+ | | |
|
|
|checksum|8B collen| 1012B user data | | | 12B remained user data |
|
|
| | +---------+----------------------------+ | | |
|
|
| | | | |
|
|
+------+------------------------------------------+ +-----------------------------+
|
|
*/
|
|
func testReaderWithCache(t *testing.T) {
|
|
testData := "0123456789"
|
|
buf := bytes.NewBuffer(nil)
|
|
for i := 0; i < 102; i++ {
|
|
buf.WriteString(testData)
|
|
}
|
|
buf.WriteString("0123")
|
|
|
|
field := []*types.FieldType{types.NewFieldType(mysql.TypeString)}
|
|
chk := NewChunkWithCapacity(field, 1)
|
|
chk.AppendString(0, buf.String())
|
|
l := NewListInDisk(field)
|
|
err := l.Add(chk)
|
|
require.NoError(t, err)
|
|
|
|
// Basic test for GetRow().
|
|
row, err := l.GetRow(RowPtr{0, 0})
|
|
require.NoError(t, err)
|
|
require.Equal(t, chk.GetRow(0).GetDatumRow(field), row.GetDatumRow(field))
|
|
|
|
checksumReader := l.dataFile.getReader()
|
|
|
|
// Read all data.
|
|
data := make([]byte, 1024)
|
|
// Offset is 8, because we want to ignore col length.
|
|
readCnt, err := checksumReader.ReadAt(data, 8)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 1024, readCnt)
|
|
require.Equal(t, buf.Bytes(), data)
|
|
|
|
// Only read data of mem cache.
|
|
data = make([]byte, 1024)
|
|
readCnt, err = checksumReader.ReadAt(data, 1020)
|
|
require.Equal(t, io.EOF, err)
|
|
require.Equal(t, 12, readCnt)
|
|
require.Equal(t, buf.Bytes()[1012:], data[:12])
|
|
|
|
// Read partial data of mem cache.
|
|
data = make([]byte, 1024)
|
|
readCnt, err = checksumReader.ReadAt(data, 1025)
|
|
require.Equal(t, io.EOF, err)
|
|
require.Equal(t, 7, readCnt)
|
|
require.Equal(t, buf.Bytes()[1017:], data[:7])
|
|
|
|
// Read partial data from both file and mem cache.
|
|
data = make([]byte, 1024)
|
|
readCnt, err = checksumReader.ReadAt(data, 1010)
|
|
require.Equal(t, io.EOF, err)
|
|
require.Equal(t, 22, readCnt)
|
|
require.Equal(t, buf.Bytes()[1002:], data[:22])
|
|
|
|
// Offset is too large, so no data is read.
|
|
data = make([]byte, 1024)
|
|
readCnt, err = checksumReader.ReadAt(data, 1032)
|
|
require.Equal(t, io.EOF, err)
|
|
require.Equal(t, 0, readCnt)
|
|
require.Equal(t, data, make([]byte, 1024))
|
|
|
|
// Only read 1 byte from mem cache.
|
|
data = make([]byte, 1024)
|
|
readCnt, err = checksumReader.ReadAt(data, 1031)
|
|
require.Equal(t, io.EOF, err)
|
|
require.Equal(t, 1, readCnt)
|
|
require.Equal(t, buf.Bytes()[1023:], data[:1])
|
|
|
|
// Test user requested data is small.
|
|
// Only request 10 bytes.
|
|
data = make([]byte, 10)
|
|
readCnt, err = checksumReader.ReadAt(data, 1010)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 10, readCnt)
|
|
require.Equal(t, buf.Bytes()[1002:1012], data)
|
|
}
|
|
|
|
// Here we test situations where size of data is small, so no data is flushed to disk.
|
|
func testReaderWithCacheNoFlush(t *testing.T) {
|
|
testData := "0123456789"
|
|
|
|
field := []*types.FieldType{types.NewFieldType(mysql.TypeString)}
|
|
chk := NewChunkWithCapacity(field, 1)
|
|
chk.AppendString(0, testData)
|
|
l := NewListInDisk(field)
|
|
err := l.Add(chk)
|
|
require.NoError(t, err)
|
|
|
|
// Basic test for GetRow().
|
|
row, err := l.GetRow(RowPtr{0, 0})
|
|
require.NoError(t, err)
|
|
require.Equal(t, chk.GetRow(0).GetDatumRow(field), row.GetDatumRow(field))
|
|
checksumReader := l.dataFile.getReader()
|
|
|
|
// Read all data.
|
|
data := make([]byte, 1024)
|
|
// Offset is 8, because we want to ignore col length.
|
|
readCnt, err := checksumReader.ReadAt(data, 8)
|
|
require.Equal(t, io.EOF, err)
|
|
require.Len(t, testData, readCnt)
|
|
require.Equal(t, []byte(testData), data[:10])
|
|
}
|