Files
tidb/util/chunk/disk_test.go

361 lines
11 KiB
Go

// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package chunk
import (
"bytes"
"fmt"
"io"
"math/rand"
"os"
"path/filepath"
"reflect"
"strconv"
"strings"
"testing"
"github.com/cznic/mathutil"
"github.com/pingcap/check"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/config"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/types/json"
"github.com/pingcap/tidb/util/checksum"
"github.com/pingcap/tidb/util/encrypt"
)
func initChunks(numChk, numRow int) ([]*Chunk, []*types.FieldType) {
fields := []*types.FieldType{
types.NewFieldType(mysql.TypeVarString),
types.NewFieldType(mysql.TypeLonglong),
types.NewFieldType(mysql.TypeVarString),
types.NewFieldType(mysql.TypeLonglong),
types.NewFieldType(mysql.TypeJSON),
}
chks := make([]*Chunk, 0, numChk)
for chkIdx := 0; chkIdx < numChk; chkIdx++ {
chk := NewChunkWithCapacity(fields, numRow)
for rowIdx := 0; rowIdx < numRow; rowIdx++ {
data := int64(chkIdx*numRow + rowIdx)
chk.AppendString(0, fmt.Sprint(data))
chk.AppendNull(1)
chk.AppendNull(2)
chk.AppendInt64(3, data)
if chkIdx%2 == 0 {
chk.AppendJSON(4, json.CreateBinary(fmt.Sprint(data)))
} else {
chk.AppendNull(4)
}
}
chks = append(chks, chk)
}
return chks, fields
}
func (s *testChunkSuite) TestListInDisk(c *check.C) {
numChk, numRow := 2, 2
chks, fields := initChunks(numChk, numRow)
l := NewListInDisk(fields)
defer func() {
err := l.Close()
c.Check(err, check.IsNil)
c.Check(l.disk, check.NotNil)
_, err = os.Stat(l.disk.Name())
c.Check(os.IsNotExist(err), check.IsTrue)
}()
for _, chk := range chks {
err := l.Add(chk)
c.Check(err, check.IsNil)
}
c.Assert(strings.HasPrefix(l.disk.Name(), filepath.Join(os.TempDir(), "oom-use-tmp-storage")), check.Equals, true)
c.Check(l.NumChunks(), check.Equals, numChk)
c.Check(l.GetDiskTracker().BytesConsumed() > 0, check.IsTrue)
for chkIdx := 0; chkIdx < numChk; chkIdx++ {
for rowIdx := 0; rowIdx < numRow; rowIdx++ {
row, err := l.GetRow(RowPtr{ChkIdx: uint32(chkIdx), RowIdx: uint32(rowIdx)})
c.Check(err, check.IsNil)
c.Check(row.GetDatumRow(fields), check.DeepEquals, chks[chkIdx].GetRow(rowIdx).GetDatumRow(fields))
}
}
}
func BenchmarkListInDiskAdd(b *testing.B) {
numChk, numRow := 1, 2
chks, fields := initChunks(numChk, numRow)
chk := chks[0]
l := NewListInDisk(fields)
defer l.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := l.Add(chk)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkListInDiskGetRow(b *testing.B) {
numChk, numRow := 10000, 2
chks, fields := initChunks(numChk, numRow)
l := NewListInDisk(fields)
defer l.Close()
for _, chk := range chks {
err := l.Add(chk)
if err != nil {
b.Fatal(err)
}
}
rand.Seed(0)
ptrs := make([]RowPtr, 0, b.N)
for i := 0; i < mathutil.Min(b.N, 10000); i++ {
ptrs = append(ptrs, RowPtr{
ChkIdx: rand.Uint32() % uint32(numChk),
RowIdx: rand.Uint32() % uint32(numRow),
})
}
for i := 10000; i < cap(ptrs); i++ {
ptrs = append(ptrs, ptrs[i%10000])
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := l.GetRow(ptrs[i])
if err != nil {
b.Fatal(err)
}
}
}
type listInDiskWriteDisk struct {
ListInDisk
}
func newListInDiskWriteDisk(fieldTypes []*types.FieldType) (*listInDiskWriteDisk, error) {
l := listInDiskWriteDisk{*NewListInDisk(fieldTypes)}
disk, err := os.CreateTemp(config.GetGlobalConfig().TempStoragePath, strconv.Itoa(l.diskTracker.Label()))
if err != nil {
return nil, err
}
l.disk = disk
l.w = disk
return &l, nil
}
func (l *listInDiskWriteDisk) GetRow(ptr RowPtr) (row Row, err error) {
err = l.flush()
if err != nil {
return
}
off := l.offsets[ptr.ChkIdx][ptr.RowIdx]
r := io.NewSectionReader(l.disk, off, l.offWrite-off)
format := rowInDisk{numCol: len(l.fieldTypes)}
_, err = format.ReadFrom(r)
if err != nil {
return row, err
}
row = format.toMutRow(l.fieldTypes).ToRow()
return row, err
}
func checkRow(c *check.C, row1, row2 Row) {
c.Assert(row1.GetString(0), check.Equals, row2.GetString(0))
c.Assert(row1.GetInt64(1), check.Equals, row2.GetInt64(1))
c.Assert(row1.GetString(2), check.Equals, row2.GetString(2))
c.Assert(row1.GetInt64(3), check.Equals, row2.GetInt64(3))
if !row1.IsNull(4) {
c.Assert(row1.GetJSON(4).String(), check.Equals, row2.GetJSON(4).String())
}
}
func testListInDisk(c *check.C) {
numChk, numRow := 10, 1000
chks, fields := initChunks(numChk, numRow)
lChecksum := NewListInDisk(fields)
defer lChecksum.Close()
lDisk, err := newListInDiskWriteDisk(fields)
c.Assert(err, check.IsNil)
defer lDisk.Close()
for _, chk := range chks {
err := lChecksum.Add(chk)
c.Assert(err, check.IsNil)
err = lDisk.Add(chk)
c.Assert(err, check.IsNil)
}
var ptrs []RowPtr
for i := 0; i < numChk; i++ {
for j := 0; j < numRow; j++ {
ptrs = append(ptrs, RowPtr{
ChkIdx: uint32(i),
RowIdx: uint32(j),
})
}
}
for _, rowPtr := range ptrs {
row1, err := lChecksum.GetRow(rowPtr)
c.Assert(err, check.IsNil)
row2, err := lDisk.GetRow(rowPtr)
c.Assert(err, check.IsNil)
checkRow(c, row1, row2)
}
}
func (s *testChunkSuite) TestListInDiskWithChecksum(c *check.C) {
defer config.RestoreFunc()()
config.UpdateGlobal(func(conf *config.Config) {
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodPlaintext
})
testListInDisk(c)
testReaderWithCache(c)
testReaderWithCacheNoFlush(c)
}
func (s *testChunkSuite) TestListInDiskWithChecksumAndEncrypt(c *check.C) {
defer config.RestoreFunc()()
config.UpdateGlobal(func(conf *config.Config) {
conf.Security.SpilledFileEncryptionMethod = config.SpilledFileEncryptionMethodAES128CTR
})
testListInDisk(c)
testReaderWithCache(c)
testReaderWithCacheNoFlush(c)
}
// Following diagram describes the testdata we use to test:
// 4 B: checksum of this segment.
// 8 B: all columns' length, in the following example, we will only have one column.
// 1012 B: data in file. because max length of each segment is 1024, so we only have 1020B for user payload.
//
// Data in File Data in mem cache
// +------+------------------------------------------+ +-----------------------------+
// | | 1020B payload | | |
// |4Bytes| +---------+----------------------------+ | | |
// |checksum|8B collen| 1012B user data | | | 12B remained user data |
// | | +---------+----------------------------+ | | |
// | | | | |
// +------+------------------------------------------+ +-----------------------------+
func testReaderWithCache(c *check.C) {
testData := "0123456789"
buf := bytes.NewBuffer(nil)
for i := 0; i < 102; i++ {
buf.WriteString(testData)
}
buf.WriteString("0123")
field := []*types.FieldType{types.NewFieldType(mysql.TypeString)}
chk := NewChunkWithCapacity(field, 1)
chk.AppendString(0, buf.String())
l := NewListInDisk(field)
err := l.Add(chk)
c.Assert(err, check.IsNil)
// Basic test for GetRow().
row, err := l.GetRow(RowPtr{0, 0})
c.Assert(err, check.IsNil)
c.Assert(row.GetDatumRow(field), check.DeepEquals, chk.GetRow(0).GetDatumRow(field))
var underlying io.ReaderAt = l.disk
if l.ctrCipher != nil {
underlying = NewReaderWithCache(encrypt.NewReader(l.disk, l.ctrCipher), l.cipherWriter.GetCache(), l.cipherWriter.GetCacheDataOffset())
}
checksumReader := NewReaderWithCache(checksum.NewReader(underlying), l.checksumWriter.GetCache(), l.checksumWriter.GetCacheDataOffset())
// Read all data.
data := make([]byte, 1024)
// Offset is 8, because we want to ignore col length.
readCnt, err := checksumReader.ReadAt(data, 8)
c.Assert(err, check.IsNil)
c.Assert(readCnt, check.Equals, 1024)
c.Assert(reflect.DeepEqual(data, buf.Bytes()), check.IsTrue)
// Only read data of mem cache.
data = make([]byte, 1024)
readCnt, err = checksumReader.ReadAt(data, 1020)
c.Assert(err, check.Equals, io.EOF)
c.Assert(readCnt, check.Equals, 12)
c.Assert(reflect.DeepEqual(data[:12], buf.Bytes()[1012:]), check.IsTrue)
// Read partial data of mem cache.
data = make([]byte, 1024)
readCnt, err = checksumReader.ReadAt(data, 1025)
c.Assert(err, check.Equals, io.EOF)
c.Assert(readCnt, check.Equals, 7)
c.Assert(reflect.DeepEqual(data[:7], buf.Bytes()[1017:]), check.IsTrue)
// Read partial data from both file and mem cache.
data = make([]byte, 1024)
readCnt, err = checksumReader.ReadAt(data, 1010)
c.Assert(err, check.Equals, io.EOF)
c.Assert(readCnt, check.Equals, 22)
c.Assert(reflect.DeepEqual(data[:22], buf.Bytes()[1002:]), check.IsTrue)
// Offset is too large, so no data is read.
data = make([]byte, 1024)
readCnt, err = checksumReader.ReadAt(data, 1032)
c.Assert(err, check.Equals, io.EOF)
c.Assert(readCnt, check.Equals, 0)
c.Assert(reflect.DeepEqual(data, make([]byte, 1024)), check.IsTrue)
// Only read 1 byte from mem cache.
data = make([]byte, 1024)
readCnt, err = checksumReader.ReadAt(data, 1031)
c.Assert(err, check.Equals, io.EOF)
c.Assert(readCnt, check.Equals, 1)
c.Assert(reflect.DeepEqual(data[:1], buf.Bytes()[1023:]), check.IsTrue)
// Test user requested data is small.
// Only request 10 bytes.
data = make([]byte, 10)
readCnt, err = checksumReader.ReadAt(data, 1010)
c.Assert(err, check.IsNil)
c.Assert(readCnt, check.Equals, 10)
c.Assert(reflect.DeepEqual(data, buf.Bytes()[1002:1012]), check.IsTrue)
}
// Here we test situations where size of data is small, so no data is flushed to disk.
func testReaderWithCacheNoFlush(c *check.C) {
testData := "0123456789"
field := []*types.FieldType{types.NewFieldType(mysql.TypeString)}
chk := NewChunkWithCapacity(field, 1)
chk.AppendString(0, testData)
l := NewListInDisk(field)
err := l.Add(chk)
c.Assert(err, check.IsNil)
// Basic test for GetRow().
row, err := l.GetRow(RowPtr{0, 0})
c.Assert(err, check.IsNil)
c.Assert(row.GetDatumRow(field), check.DeepEquals, chk.GetRow(0).GetDatumRow(field))
var underlying io.ReaderAt = l.disk
if l.ctrCipher != nil {
underlying = NewReaderWithCache(encrypt.NewReader(l.disk, l.ctrCipher), l.cipherWriter.GetCache(), l.cipherWriter.GetCacheDataOffset())
}
checksumReader := NewReaderWithCache(checksum.NewReader(underlying), l.checksumWriter.GetCache(), l.checksumWriter.GetCacheDataOffset())
// Read all data.
data := make([]byte, 1024)
// Offset is 8, because we want to ignore col length.
readCnt, err := checksumReader.ReadAt(data, 8)
c.Assert(err, check.Equals, io.EOF)
c.Assert(readCnt, check.Equals, len(testData))
c.Assert(reflect.DeepEqual(data[:10], []byte(testData)), check.IsTrue)
}