457 lines
13 KiB
Go
457 lines
13 KiB
Go
// Copyright 2019 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package chunk
|
|
|
|
import (
|
|
"io"
|
|
"os"
|
|
"strconv"
|
|
|
|
errors2 "github.com/pingcap/errors"
|
|
"github.com/pingcap/tidb/config"
|
|
"github.com/pingcap/tidb/parser/terror"
|
|
"github.com/pingcap/tidb/types"
|
|
"github.com/pingcap/tidb/util/checksum"
|
|
"github.com/pingcap/tidb/util/disk"
|
|
"github.com/pingcap/tidb/util/encrypt"
|
|
"github.com/pingcap/tidb/util/memory"
|
|
)
|
|
|
|
// ListInDisk represents a slice of chunks storing in temporary disk.
|
|
type ListInDisk struct {
|
|
fieldTypes []*types.FieldType
|
|
numRowsOfEachChunk []int
|
|
rowNumOfEachChunkFirstRow []int
|
|
totalNumRows int
|
|
diskTracker *disk.Tracker // track disk usage.
|
|
|
|
dataFile diskFileReaderWriter
|
|
offsetFile diskFileReaderWriter
|
|
}
|
|
|
|
// diskFileReaderWriter represents a Reader and a Writer for the temporary disk file.
|
|
type diskFileReaderWriter struct {
|
|
disk *os.File
|
|
w io.WriteCloser
|
|
// offWrite is the current offset for writing.
|
|
offWrite int64
|
|
|
|
checksumWriter *checksum.Writer
|
|
cipherWriter *encrypt.Writer // cipherWriter is only enable when config SpilledFileEncryptionMethod is "aes128-ctr"
|
|
|
|
// ctrCipher stores the key and nonce using by aes encrypt io layer
|
|
ctrCipher *encrypt.CtrCipher
|
|
}
|
|
|
|
func (l *diskFileReaderWriter) initWithFileName(fileName string) (err error) {
|
|
l.disk, err = os.CreateTemp(config.GetGlobalConfig().TempStoragePath, fileName)
|
|
if err != nil {
|
|
return errors2.Trace(err)
|
|
}
|
|
var underlying io.WriteCloser = l.disk
|
|
if config.GetGlobalConfig().Security.SpilledFileEncryptionMethod != config.SpilledFileEncryptionMethodPlaintext {
|
|
// The possible values of SpilledFileEncryptionMethod are "plaintext", "aes128-ctr"
|
|
l.ctrCipher, err = encrypt.NewCtrCipher()
|
|
if err != nil {
|
|
return
|
|
}
|
|
l.cipherWriter = encrypt.NewWriter(l.disk, l.ctrCipher)
|
|
underlying = l.cipherWriter
|
|
}
|
|
l.checksumWriter = checksum.NewWriter(underlying)
|
|
l.w = l.checksumWriter
|
|
return
|
|
}
|
|
|
|
func (l *diskFileReaderWriter) getReader() io.ReaderAt {
|
|
var underlying io.ReaderAt = l.disk
|
|
if l.ctrCipher != nil {
|
|
underlying = NewReaderWithCache(encrypt.NewReader(l.disk, l.ctrCipher), l.cipherWriter.GetCache(), l.cipherWriter.GetCacheDataOffset())
|
|
}
|
|
if l.checksumWriter != nil {
|
|
underlying = NewReaderWithCache(checksum.NewReader(underlying), l.checksumWriter.GetCache(), l.checksumWriter.GetCacheDataOffset())
|
|
}
|
|
return underlying
|
|
}
|
|
|
|
func (l *diskFileReaderWriter) getSectionReader(off int64) *io.SectionReader {
|
|
checksumReader := l.getReader()
|
|
r := io.NewSectionReader(checksumReader, off, l.offWrite-off)
|
|
return r
|
|
}
|
|
|
|
func (l *diskFileReaderWriter) getWriter() io.Writer {
|
|
return l.w
|
|
}
|
|
|
|
var defaultChunkListInDiskPath = "chunk.ListInDisk"
|
|
var defaultChunkListInDiskOffsetPath = "chunk.ListInDiskOffset"
|
|
|
|
// NewListInDisk creates a new ListInDisk with field types.
|
|
func NewListInDisk(fieldTypes []*types.FieldType) *ListInDisk {
|
|
l := &ListInDisk{
|
|
fieldTypes: fieldTypes,
|
|
// TODO(fengliyuan): set the quota of disk usage.
|
|
diskTracker: disk.NewTracker(memory.LabelForChunkListInDisk, -1),
|
|
}
|
|
return l
|
|
}
|
|
|
|
func (l *ListInDisk) initDiskFile() (err error) {
|
|
err = disk.CheckAndInitTempDir()
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = l.dataFile.initWithFileName(defaultChunkListInDiskPath + strconv.Itoa(l.diskTracker.Label()))
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = l.offsetFile.initWithFileName(defaultChunkListInDiskOffsetPath + strconv.Itoa(l.diskTracker.Label()))
|
|
return
|
|
}
|
|
|
|
// Len returns the number of rows in ListInDisk
|
|
func (l *ListInDisk) Len() int {
|
|
return l.totalNumRows
|
|
}
|
|
|
|
// GetDiskTracker returns the memory tracker of this List.
|
|
func (l *ListInDisk) GetDiskTracker() *disk.Tracker {
|
|
return l.diskTracker
|
|
}
|
|
|
|
// Add adds a chunk to the ListInDisk. Caller must make sure the input chk
|
|
// is not empty and not used any more and has the same field types.
|
|
// Warning: Do not use Add concurrently.
|
|
func (l *ListInDisk) Add(chk *Chunk) (err error) {
|
|
if chk.NumRows() == 0 {
|
|
return errors2.New("chunk appended to List should have at least 1 row")
|
|
}
|
|
if l.dataFile.disk == nil {
|
|
err = l.initDiskFile()
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
// Append data
|
|
chkInDisk := chunkInDisk{Chunk: chk, offWrite: l.dataFile.offWrite}
|
|
n, err := chkInDisk.WriteTo(l.dataFile.getWriter())
|
|
l.dataFile.offWrite += n
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Append offsets
|
|
offsetsOfRows := chkInDisk.getOffsetsOfRows()
|
|
l.numRowsOfEachChunk = append(l.numRowsOfEachChunk, len(offsetsOfRows))
|
|
l.rowNumOfEachChunkFirstRow = append(l.rowNumOfEachChunkFirstRow, l.totalNumRows)
|
|
n2, err := offsetsOfRows.WriteTo(l.offsetFile.getWriter())
|
|
l.offsetFile.offWrite += n2
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
l.diskTracker.Consume(n + n2)
|
|
l.totalNumRows += chk.NumRows()
|
|
return
|
|
}
|
|
|
|
// GetChunk gets a Chunk from the ListInDisk by chkIdx.
|
|
func (l *ListInDisk) GetChunk(chkIdx int) (*Chunk, error) {
|
|
chk := NewChunkWithCapacity(l.fieldTypes, l.NumRowsOfChunk(chkIdx))
|
|
chkSize := l.numRowsOfEachChunk[chkIdx]
|
|
for rowIdx := 0; rowIdx < chkSize; rowIdx++ {
|
|
_, _, err := l.GetRowAndAppendToChunk(RowPtr{ChkIdx: uint32(chkIdx), RowIdx: uint32(rowIdx)}, chk)
|
|
if err != nil {
|
|
return chk, err
|
|
}
|
|
}
|
|
return chk, nil
|
|
}
|
|
|
|
// GetRow gets a Row from the ListInDisk by RowPtr.
|
|
func (l *ListInDisk) GetRow(ptr RowPtr) (row Row, err error) {
|
|
row, _, err = l.GetRowAndAppendToChunk(ptr, nil)
|
|
return row, err
|
|
}
|
|
|
|
// GetRowAndAppendToChunk gets a Row from the ListInDisk by RowPtr. Return the Row and the Ref Chunk.
|
|
func (l *ListInDisk) GetRowAndAppendToChunk(ptr RowPtr, chk *Chunk) (row Row, _ *Chunk, err error) {
|
|
off, err := l.getOffset(ptr.ChkIdx, ptr.RowIdx)
|
|
if err != nil {
|
|
return
|
|
}
|
|
r := l.dataFile.getSectionReader(off)
|
|
format := rowInDisk{numCol: len(l.fieldTypes)}
|
|
_, err = format.ReadFrom(r)
|
|
if err != nil {
|
|
return row, nil, err
|
|
}
|
|
row, chk = format.toRow(l.fieldTypes, chk)
|
|
return row, chk, err
|
|
}
|
|
|
|
func (l *ListInDisk) getOffset(chkIdx uint32, rowIdx uint32) (int64, error) {
|
|
offsetInOffsetFile := l.rowNumOfEachChunkFirstRow[chkIdx] + int(rowIdx)
|
|
b := make([]byte, 8)
|
|
reader := l.offsetFile.getSectionReader(int64(offsetInOffsetFile) * 8)
|
|
n, err := io.ReadFull(reader, b)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if n != 8 {
|
|
return 0, errors2.New("The file spilled is broken, can not get data offset from the disk")
|
|
}
|
|
return bytesToI64Slice(b)[0], nil
|
|
}
|
|
|
|
// NumRowsOfChunk returns the number of rows of a chunk in the ListInDisk.
|
|
func (l *ListInDisk) NumRowsOfChunk(chkID int) int {
|
|
return l.numRowsOfEachChunk[chkID]
|
|
}
|
|
|
|
// NumChunks returns the number of chunks in the ListInDisk.
|
|
func (l *ListInDisk) NumChunks() int {
|
|
return len(l.numRowsOfEachChunk)
|
|
}
|
|
|
|
// Close releases the disk resource.
|
|
func (l *ListInDisk) Close() error {
|
|
if l.dataFile.disk != nil {
|
|
l.diskTracker.Consume(-l.diskTracker.BytesConsumed())
|
|
terror.Call(l.dataFile.disk.Close)
|
|
terror.Log(os.Remove(l.dataFile.disk.Name()))
|
|
}
|
|
if l.offsetFile.disk != nil {
|
|
terror.Call(l.offsetFile.disk.Close)
|
|
terror.Log(os.Remove(l.offsetFile.disk.Name()))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// chunkInDisk represents a chunk in disk format. Each row of the chunk
|
|
// is serialized and in sequence ordered. The format of each row is like
|
|
// the struct diskFormatRow, put size of each column first, then the
|
|
// data of each column.
|
|
//
|
|
// For example, a chunk has 2 rows and 3 columns, the disk format of the
|
|
// chunk is as follow:
|
|
//
|
|
// [size of row0 column0], [size of row0 column1], [size of row0 column2]
|
|
// [data of row0 column0], [data of row0 column1], [data of row0 column2]
|
|
// [size of row1 column0], [size of row1 column1], [size of row1 column2]
|
|
// [data of row1 column0], [data of row1 column1], [data of row1 column2]
|
|
//
|
|
// If a column of a row is null, the size of it is -1 and the data is empty.
|
|
type chunkInDisk struct {
|
|
*Chunk
|
|
// offWrite is the current offset for writing.
|
|
offWrite int64
|
|
// offsetsOfRows stores the offset of each row.
|
|
offsetsOfRows offsetsOfRows
|
|
}
|
|
|
|
type offsetsOfRows []int64
|
|
|
|
// WriteTo serializes the offsetsOfRow, and writes to w.
|
|
func (off offsetsOfRows) WriteTo(w io.Writer) (written int64, err error) {
|
|
n, err := w.Write(i64SliceToBytes(off))
|
|
return int64(n), err
|
|
}
|
|
|
|
// WriteTo serializes the chunk into the format of chunkInDisk, and
|
|
// writes to w.
|
|
func (chk *chunkInDisk) WriteTo(w io.Writer) (written int64, err error) {
|
|
var n int64
|
|
numRows := chk.NumRows()
|
|
chk.offsetsOfRows = make([]int64, 0, numRows)
|
|
var format *diskFormatRow
|
|
for rowIdx := 0; rowIdx < numRows; rowIdx++ {
|
|
format = convertFromRow(chk.GetRow(rowIdx), format)
|
|
chk.offsetsOfRows = append(chk.offsetsOfRows, chk.offWrite+written)
|
|
|
|
n, err = rowInDisk{diskFormatRow: *format}.WriteTo(w)
|
|
written += n
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// getOffsetsOfRows gets the offset of each row.
|
|
func (chk *chunkInDisk) getOffsetsOfRows() offsetsOfRows { return chk.offsetsOfRows }
|
|
|
|
// rowInDisk represents a Row in format of diskFormatRow.
|
|
type rowInDisk struct {
|
|
numCol int
|
|
diskFormatRow
|
|
}
|
|
|
|
// WriteTo serializes a row of the chunk into the format of
|
|
// diskFormatRow, and writes to w.
|
|
func (row rowInDisk) WriteTo(w io.Writer) (written int64, err error) {
|
|
n, err := w.Write(i64SliceToBytes(row.sizesOfColumns))
|
|
written += int64(n)
|
|
if err != nil {
|
|
return
|
|
}
|
|
for _, data := range row.cells {
|
|
n, err = w.Write(data)
|
|
written += int64(n)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// ReadFrom reads data of r, deserializes it from the format of diskFormatRow
|
|
// into Row.
|
|
func (row *rowInDisk) ReadFrom(r io.Reader) (n int64, err error) {
|
|
b := make([]byte, 8*row.numCol)
|
|
var n1 int
|
|
n1, err = io.ReadFull(r, b)
|
|
n += int64(n1)
|
|
if err != nil {
|
|
return
|
|
}
|
|
row.sizesOfColumns = bytesToI64Slice(b)
|
|
row.cells = make([][]byte, 0, row.numCol)
|
|
for _, size := range row.sizesOfColumns {
|
|
if size == -1 {
|
|
continue
|
|
}
|
|
cell := make([]byte, size)
|
|
row.cells = append(row.cells, cell)
|
|
n1, err = io.ReadFull(r, cell)
|
|
n += int64(n1)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// diskFormatRow represents a row in a chunk in disk format. The disk format
|
|
// of a row is described in the doc of chunkInDisk.
|
|
type diskFormatRow struct {
|
|
// sizesOfColumns stores the size of each column in a row.
|
|
// -1 means the value of this column is null.
|
|
sizesOfColumns []int64 // -1 means null
|
|
// cells represents raw data of not-null columns in one row.
|
|
// In convertFromRow, data from Row is shallow copied to cells.
|
|
// In toRow, data in cells is deep copied to Row.
|
|
cells [][]byte
|
|
}
|
|
|
|
// convertFromRow serializes one row of chunk to diskFormatRow, then
|
|
// we can use diskFormatRow to write to disk.
|
|
func convertFromRow(row Row, reuse *diskFormatRow) (format *diskFormatRow) {
|
|
numCols := row.Chunk().NumCols()
|
|
if reuse != nil {
|
|
format = reuse
|
|
format.sizesOfColumns = format.sizesOfColumns[:0]
|
|
format.cells = format.cells[:0]
|
|
} else {
|
|
format = &diskFormatRow{
|
|
sizesOfColumns: make([]int64, 0, numCols),
|
|
cells: make([][]byte, 0, numCols),
|
|
}
|
|
}
|
|
for colIdx := 0; colIdx < numCols; colIdx++ {
|
|
if row.IsNull(colIdx) {
|
|
format.sizesOfColumns = append(format.sizesOfColumns, -1)
|
|
} else {
|
|
cell := row.GetRaw(colIdx)
|
|
format.sizesOfColumns = append(format.sizesOfColumns, int64(len(cell)))
|
|
format.cells = append(format.cells, cell)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// toRow deserializes diskFormatRow to Row.
|
|
func (format *diskFormatRow) toRow(fields []*types.FieldType, chk *Chunk) (Row, *Chunk) {
|
|
if chk == nil || chk.IsFull() {
|
|
chk = NewChunkWithCapacity(fields, 1024)
|
|
}
|
|
var cellOff int
|
|
for colIdx, size := range format.sizesOfColumns {
|
|
col := chk.columns[colIdx]
|
|
if size == -1 { // isNull
|
|
col.AppendNull()
|
|
} else {
|
|
if col.isFixed() {
|
|
col.elemBuf = format.cells[cellOff]
|
|
col.finishAppendFixed()
|
|
} else {
|
|
col.AppendBytes(format.cells[cellOff])
|
|
}
|
|
cellOff++
|
|
}
|
|
}
|
|
|
|
return Row{c: chk, idx: chk.NumRows() - 1}, chk
|
|
}
|
|
|
|
// ReaderWithCache helps to read data that has not be flushed to underlying layer.
|
|
// By using ReaderWithCache, user can still write data into ListInDisk even after reading.
|
|
type ReaderWithCache struct {
|
|
r io.ReaderAt
|
|
cacheOff int64
|
|
cache []byte
|
|
}
|
|
|
|
// NewReaderWithCache returns a ReaderWithCache.
|
|
func NewReaderWithCache(r io.ReaderAt, cache []byte, cacheOff int64) *ReaderWithCache {
|
|
return &ReaderWithCache{
|
|
r: r,
|
|
cacheOff: cacheOff,
|
|
cache: cache,
|
|
}
|
|
}
|
|
|
|
// ReadAt implements the ReadAt interface.
|
|
func (r *ReaderWithCache) ReadAt(p []byte, off int64) (readCnt int, err error) {
|
|
readCnt, err = r.r.ReadAt(p, off)
|
|
if err != io.EOF {
|
|
return readCnt, err
|
|
}
|
|
|
|
if len(p) == readCnt {
|
|
return readCnt, err
|
|
} else if len(p) < readCnt {
|
|
return readCnt, errors2.Trace(errors2.Errorf("cannot read more data than user requested"+
|
|
"(readCnt: %v, len(p): %v", readCnt, len(p)))
|
|
}
|
|
|
|
// When got here, user input is not filled fully, so we need read data from cache.
|
|
err = nil
|
|
p = p[readCnt:]
|
|
beg := off - r.cacheOff
|
|
if beg < 0 {
|
|
// This happens when only partial data of user requested resides in r.cache.
|
|
beg = 0
|
|
}
|
|
end := int(beg) + len(p)
|
|
if end > len(r.cache) {
|
|
err = io.EOF
|
|
end = len(r.cache)
|
|
}
|
|
readCnt += copy(p, r.cache[beg:end])
|
|
return readCnt, err
|
|
}
|