Files
tidb/br/pkg/lightning/backend/kv/sql2kv.go

545 lines
16 KiB
Go

// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// TODO combine with the pkg/kv package outside.
package kv
import (
"context"
"fmt"
"math"
"math/rand"
"sort"
"github.com/pingcap/errors"
"github.com/pingcap/tidb/br/pkg/lightning/common"
"github.com/pingcap/tidb/br/pkg/lightning/log"
"github.com/pingcap/tidb/br/pkg/lightning/metric"
"github.com/pingcap/tidb/br/pkg/lightning/verification"
"github.com/pingcap/tidb/br/pkg/logutil"
"github.com/pingcap/tidb/br/pkg/redact"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/meta/autoid"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/table"
"github.com/pingcap/tidb/table/tables"
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
// Import tidb/planner/core to initialize expression.RewriteAstExpr
_ "github.com/pingcap/tidb/planner/core"
)
var ExtraHandleColumnInfo = model.NewExtraHandleColInfo()
type genCol struct {
index int
expr expression.Expression
}
type autoIDConverter func(int64) int64
type tableKVEncoder struct {
tbl table.Table
se *session
recordCache []types.Datum
genCols []genCol
// convert auto id for shard rowid or auto random id base on row id generated by lightning
autoIDFn autoIDConverter
}
func NewTableKVEncoder(tbl table.Table, options *SessionOptions) (Encoder, error) {
metric.KvEncoderCounter.WithLabelValues("open").Inc()
meta := tbl.Meta()
cols := tbl.Cols()
se := newSession(options)
// Set CommonAddRecordCtx to session to reuse the slices and BufStore in AddRecord
recordCtx := tables.NewCommonAddRecordCtx(len(cols))
tables.SetAddRecordCtx(se, recordCtx)
autoIDFn := func(id int64) int64 { return id }
if meta.PKIsHandle && meta.ContainsAutoRandomBits() {
for _, col := range cols {
if mysql.HasPriKeyFlag(col.Flag) {
incrementalBits := autoRandomIncrementBits(col, int(meta.AutoRandomBits))
autoRandomBits := rand.New(rand.NewSource(options.AutoRandomSeed)).Int63n(1<<meta.AutoRandomBits) << incrementalBits // nolint:gosec
autoIDFn = func(id int64) int64 {
return autoRandomBits | id
}
break
}
}
} else if meta.ShardRowIDBits > 0 {
rd := rand.New(rand.NewSource(options.AutoRandomSeed)) // nolint:gosec
mask := int64(1)<<meta.ShardRowIDBits - 1
shift := autoid.RowIDBitLength - meta.ShardRowIDBits - 1
autoIDFn = func(id int64) int64 {
rd.Seed(id)
shardBits := (int64(rd.Uint32()) & mask) << shift
return shardBits | id
}
}
// collect expressions for evaluating stored generated columns
genCols, err := collectGeneratedColumns(se, meta, cols)
if err != nil {
return nil, errors.Annotate(err, "failed to parse generated column expressions")
}
return &tableKVEncoder{
tbl: tbl,
se: se,
genCols: genCols,
autoIDFn: autoIDFn,
}, nil
}
func autoRandomIncrementBits(col *table.Column, randomBits int) int {
typeBitsLength := mysql.DefaultLengthOfMysqlTypes[col.Tp] * 8
incrementalBits := typeBitsLength - randomBits
hasSignBit := !mysql.HasUnsignedFlag(col.Flag)
if hasSignBit {
incrementalBits--
}
return incrementalBits
}
// collectGeneratedColumns collects all expressions required to evaluate the
// results of all generated columns. The returning slice is in evaluation order.
func collectGeneratedColumns(se *session, meta *model.TableInfo, cols []*table.Column) ([]genCol, error) {
hasGenCol := false
for _, col := range cols {
if col.GeneratedExpr != nil {
hasGenCol = true
break
}
}
if !hasGenCol {
return nil, nil
}
// the expression rewriter requires a non-nil TxnCtx.
se.vars.TxnCtx = new(variable.TransactionContext)
defer func() {
se.vars.TxnCtx = nil
}()
// not using TableInfo2SchemaAndNames to avoid parsing all virtual generated columns again.
exprColumns := make([]*expression.Column, 0, len(cols))
names := make(types.NameSlice, 0, len(cols))
for i, col := range cols {
names = append(names, &types.FieldName{
OrigTblName: meta.Name,
OrigColName: col.Name,
TblName: meta.Name,
ColName: col.Name,
})
exprColumns = append(exprColumns, &expression.Column{
RetType: col.FieldType.Clone(),
ID: col.ID,
UniqueID: int64(i),
Index: col.Offset,
OrigName: names[i].String(),
IsHidden: col.Hidden,
})
}
schema := expression.NewSchema(exprColumns...)
// as long as we have a stored generated column, all columns it referred to must be evaluated as well.
// for simplicity we just evaluate all generated columns (virtual or not) before the last stored one.
var genCols []genCol
for i, col := range cols {
if col.GeneratedExpr != nil {
expr, err := expression.RewriteAstExpr(se, col.GeneratedExpr, schema, names)
if err != nil {
return nil, err
}
genCols = append(genCols, genCol{
index: i,
expr: expr,
})
}
}
// order the result by column offset so they match the evaluation order.
sort.Slice(genCols, func(i, j int) bool {
return cols[genCols[i].index].Offset < cols[genCols[j].index].Offset
})
return genCols, nil
}
func (kvcodec *tableKVEncoder) Close() {
kvcodec.se.Close()
metric.KvEncoderCounter.WithLabelValues("closed").Inc()
}
// RowArrayMarshaler wraps a slice of types.Datum for logging the content into zap.
type RowArrayMarshaler []types.Datum
var kindStr = [...]string{
types.KindNull: "null",
types.KindInt64: "int64",
types.KindUint64: "uint64",
types.KindFloat32: "float32",
types.KindFloat64: "float64",
types.KindString: "string",
types.KindBytes: "bytes",
types.KindBinaryLiteral: "binary",
types.KindMysqlDecimal: "decimal",
types.KindMysqlDuration: "duration",
types.KindMysqlEnum: "enum",
types.KindMysqlBit: "bit",
types.KindMysqlSet: "set",
types.KindMysqlTime: "time",
types.KindInterface: "interface",
types.KindMinNotNull: "min",
types.KindMaxValue: "max",
types.KindRaw: "raw",
types.KindMysqlJSON: "json",
}
// MarshalLogArray implements the zapcore.ArrayMarshaler interface
func (row RowArrayMarshaler) MarshalLogArray(encoder zapcore.ArrayEncoder) error {
for _, datum := range row {
kind := datum.Kind()
var str string
var err error
switch kind {
case types.KindNull:
str = "NULL"
case types.KindMinNotNull:
str = "-inf"
case types.KindMaxValue:
str = "+inf"
default:
str, err = datum.ToString()
if err != nil {
return err
}
}
if err := encoder.AppendObject(zapcore.ObjectMarshalerFunc(func(enc zapcore.ObjectEncoder) error {
enc.AddString("kind", kindStr[kind])
enc.AddString("val", redact.String(str))
return nil
})); err != nil {
return err
}
}
return nil
}
func logKVConvertFailed(logger log.Logger, row []types.Datum, j int, colInfo *model.ColumnInfo, err error) error {
var original types.Datum
if 0 <= j && j < len(row) {
original = row[j]
row = row[j : j+1]
}
logger.Error("kv convert failed",
zap.Array("original", RowArrayMarshaler(row)),
zap.Int("originalCol", j),
zap.String("colName", colInfo.Name.O),
zap.Stringer("colType", &colInfo.FieldType),
log.ShortError(err),
)
log.L().Error("failed to covert kv value", logutil.RedactAny("origVal", original.GetValue()),
zap.Stringer("fieldType", &colInfo.FieldType), zap.String("column", colInfo.Name.O),
zap.Int("columnID", j+1))
return errors.Annotatef(
err,
"failed to cast value as %s for column `%s` (#%d)", &colInfo.FieldType, colInfo.Name.O, j+1,
)
}
func logEvalGenExprFailed(logger log.Logger, row []types.Datum, colInfo *model.ColumnInfo, err error) error {
logger.Error("kv convert failed: cannot evaluate generated column expression",
zap.Array("original", RowArrayMarshaler(row)),
zap.String("colName", colInfo.Name.O),
log.ShortError(err),
)
return errors.Annotatef(
err,
"failed to evaluate generated column expression for column `%s`",
colInfo.Name.O,
)
}
type KvPairs struct {
pairs []common.KvPair
bytesBuf *bytesBuf
memBuf *kvMemBuf
}
// MakeRowsFromKvPairs converts a KvPair slice into a Rows instance. This is
// mainly used for testing only. The resulting Rows instance should only be used
// for the importer backend.
func MakeRowsFromKvPairs(pairs []common.KvPair) Rows {
return &KvPairs{pairs: pairs}
}
// MakeRowFromKvPairs converts a KvPair slice into a Row instance. This is
// mainly used for testing only. The resulting Row instance should only be used
// for the importer backend.
func MakeRowFromKvPairs(pairs []common.KvPair) Row {
return &KvPairs{pairs: pairs}
}
// KvPairsFromRows converts a Rows instance constructed from MakeRowsFromKvPairs
// back into a slice of KvPair. This method panics if the Rows is not
// constructed in such way.
// nolint:golint // kv.KvPairsFromRows sounds good.
func KvPairsFromRows(rows Rows) []common.KvPair {
return rows.(*KvPairs).pairs
}
func evaluateGeneratedColumns(se *session, record []types.Datum, cols []*table.Column, genCols []genCol) (err error, errCol *model.ColumnInfo) {
mutRow := chunk.MutRowFromDatums(record)
for _, gc := range genCols {
col := cols[gc.index].ToInfo()
evaluated, err := gc.expr.Eval(mutRow.ToRow())
if err != nil {
return err, col
}
value, err := table.CastValue(se, evaluated, col, false, false)
if err != nil {
return err, col
}
mutRow.SetDatum(gc.index, value)
record[gc.index] = value
}
return nil, nil
}
// Encode a row of data into KV pairs.
//
// See comments in `(*TableRestore).initializeColumns` for the meaning of the
// `columnPermutation` parameter.
func (kvcodec *tableKVEncoder) Encode(
logger log.Logger,
row []types.Datum,
rowID int64,
columnPermutation []int,
_ string,
offset int64,
) (Row, error) {
cols := kvcodec.tbl.Cols()
var value types.Datum
var err error
//nolint:prealloc // This is a placeholder.
var record []types.Datum
if kvcodec.recordCache != nil {
record = kvcodec.recordCache
} else {
record = make([]types.Datum, 0, len(cols)+1)
}
meta := kvcodec.tbl.Meta()
isAutoRandom := meta.PKIsHandle && meta.ContainsAutoRandomBits()
for i, col := range cols {
j := columnPermutation[i]
isAutoIncCol := mysql.HasAutoIncrementFlag(col.Flag)
isPk := mysql.HasPriKeyFlag(col.Flag)
switch {
case j >= 0 && j < len(row):
value, err = table.CastValue(kvcodec.se, row[j], col.ToInfo(), false, false)
if err == nil {
err = col.HandleBadNull(&value, kvcodec.se.vars.StmtCtx)
}
case isAutoIncCol:
// we still need a conversion, e.g. to catch overflow with a TINYINT column.
value, err = table.CastValue(kvcodec.se, types.NewIntDatum(rowID), col.ToInfo(), false, false)
case isAutoRandom && isPk:
var val types.Datum
realRowID := kvcodec.autoIDFn(rowID)
if mysql.HasUnsignedFlag(col.Flag) {
val = types.NewUintDatum(uint64(realRowID))
} else {
val = types.NewIntDatum(realRowID)
}
value, err = table.CastValue(kvcodec.se, val, col.ToInfo(), false, false)
case col.IsGenerated():
// inject some dummy value for gen col so that MutRowFromDatums below sees a real value instead of nil.
// if MutRowFromDatums sees a nil it won't initialize the underlying storage and cause SetDatum to panic.
value = types.GetMinValue(&col.FieldType)
default:
value, err = table.GetColDefaultValue(kvcodec.se, col.ToInfo())
}
if err != nil {
return nil, logKVConvertFailed(logger, row, j, col.ToInfo(), err)
}
record = append(record, value)
if isAutoRandom && isPk {
incrementalBits := autoRandomIncrementBits(col, int(meta.AutoRandomBits))
alloc := kvcodec.tbl.Allocators(kvcodec.se).Get(autoid.AutoRandomType)
if err := alloc.Rebase(context.Background(), value.GetInt64()&((1<<incrementalBits)-1), false); err != nil {
return nil, errors.Trace(err)
}
}
if isAutoIncCol {
alloc := kvcodec.tbl.Allocators(kvcodec.se).Get(autoid.AutoIncrementType)
if err := alloc.Rebase(context.Background(), getAutoRecordID(value, &col.FieldType), false); err != nil {
return nil, errors.Trace(err)
}
}
}
if common.TableHasAutoRowID(meta) {
rowValue := rowID
j := columnPermutation[len(cols)]
if j >= 0 && j < len(row) {
value, err = table.CastValue(kvcodec.se, row[j], ExtraHandleColumnInfo, false, false)
rowValue = value.GetInt64()
} else {
rowID := kvcodec.autoIDFn(rowID)
value, err = types.NewIntDatum(rowID), nil
}
if err != nil {
return nil, logKVConvertFailed(logger, row, j, ExtraHandleColumnInfo, err)
}
record = append(record, value)
alloc := kvcodec.tbl.Allocators(kvcodec.se).Get(autoid.RowIDAllocType)
if err := alloc.Rebase(context.Background(), rowValue, false); err != nil {
return nil, errors.Trace(err)
}
}
if len(kvcodec.genCols) > 0 {
if err, errCol := evaluateGeneratedColumns(kvcodec.se, record, cols, kvcodec.genCols); err != nil {
return nil, logEvalGenExprFailed(logger, row, errCol, err)
}
}
_, err = kvcodec.tbl.AddRecord(kvcodec.se, record)
if err != nil {
logger.Error("kv encode failed",
zap.Array("originalRow", RowArrayMarshaler(row)),
zap.Array("convertedRow", RowArrayMarshaler(record)),
log.ShortError(err),
)
return nil, errors.Trace(err)
}
kvPairs := kvcodec.se.takeKvPairs()
for i := 0; i < len(kvPairs.pairs); i++ {
kvPairs.pairs[i].RowID = rowID
}
kvcodec.recordCache = record[:0]
return kvPairs, nil
}
// get record value for auto-increment field
//
// See: https://github.com/pingcap/tidb/blob/47f0f15b14ed54fc2222f3e304e29df7b05e6805/executor/insert_common.go#L781-L852
func getAutoRecordID(d types.Datum, target *types.FieldType) int64 {
switch target.Tp {
case mysql.TypeFloat, mysql.TypeDouble:
return int64(math.Round(d.GetFloat64()))
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong:
return d.GetInt64()
default:
panic(fmt.Sprintf("unsupported auto-increment field type '%d'", target.Tp))
}
}
func (kvs *KvPairs) Size() uint64 {
size := uint64(0)
for _, kv := range kvs.pairs {
size += uint64(len(kv.Key) + len(kv.Val))
}
return size
}
func (kvs *KvPairs) ClassifyAndAppend(
data *Rows,
dataChecksum *verification.KVChecksum,
indices *Rows,
indexChecksum *verification.KVChecksum,
) {
dataKVs := (*data).(*KvPairs)
indexKVs := (*indices).(*KvPairs)
for _, kv := range kvs.pairs {
if kv.Key[tablecodec.TableSplitKeyLen+1] == 'r' {
dataKVs.pairs = append(dataKVs.pairs, kv)
dataChecksum.UpdateOne(kv)
} else {
indexKVs.pairs = append(indexKVs.pairs, kv)
indexChecksum.UpdateOne(kv)
}
}
// the related buf is shared, so we only need to set it into one of the kvs so it can be released
if kvs.bytesBuf != nil {
dataKVs.bytesBuf = kvs.bytesBuf
dataKVs.memBuf = kvs.memBuf
kvs.bytesBuf = nil
kvs.memBuf = nil
}
*data = dataKVs
*indices = indexKVs
}
func (kvs *KvPairs) SplitIntoChunks(splitSize int) []Rows {
if len(kvs.pairs) == 0 {
return nil
}
res := make([]Rows, 0, 1)
i := 0
cumSize := 0
for j, pair := range kvs.pairs {
size := len(pair.Key) + len(pair.Val)
if i < j && cumSize+size > splitSize {
res = append(res, &KvPairs{pairs: kvs.pairs[i:j]})
i = j
cumSize = 0
}
cumSize += size
}
if i == 0 {
res = append(res, kvs)
} else {
res = append(res, &KvPairs{
pairs: kvs.pairs[i:],
bytesBuf: kvs.bytesBuf,
memBuf: kvs.memBuf,
})
}
return res
}
func (kvs *KvPairs) Clear() Rows {
if kvs.bytesBuf != nil {
kvs.memBuf.Recycle(kvs.bytesBuf)
kvs.bytesBuf = nil
kvs.memBuf = nil
}
kvs.pairs = kvs.pairs[:0]
return kvs
}