Files
tidb/util/rowcodec/encoder.go
2019-08-12 16:39:41 +08:00

317 lines
8.2 KiB
Go

// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package rowcodec
import (
"math"
"sort"
"time"
"github.com/pingcap/errors"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/parser/terror"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/codec"
)
// Encoder is used to encode a row.
type Encoder struct {
row
tempColIDs []int64
values []types.Datum
tempData []byte
sc *stmtctx.StatementContext
}
// NewEncoder creates a new Encoder with column IDs.
func NewEncoder(colIDs []int64, sc *stmtctx.StatementContext) *Encoder {
return &Encoder{
tempColIDs: colIDs,
sc: sc,
}
}
func (encoder *Encoder) reset() {
encoder.isLarge = false
encoder.numNotNullCols = 0
encoder.numNullCols = 0
encoder.data = encoder.data[:0]
encoder.values = encoder.values[:0]
}
// Encode encodes a row from a datums slice.
func (encoder *Encoder) Encode(values []types.Datum, buf []byte) ([]byte, error) {
encoder.reset()
encoder.values = append(encoder.values, values...)
for i, colID := range encoder.tempColIDs {
if colID > 255 {
encoder.isLarge = true
}
if values[i].IsNull() {
encoder.numNullCols++
} else {
encoder.numNotNullCols++
}
}
return encoder.build(buf)
}
func (encoder *Encoder) build(buf []byte) ([]byte, error) {
r := &encoder.row
// Separate null and not-null column IDs.
numCols := len(encoder.tempColIDs)
nullIdx := numCols - int(r.numNullCols)
notNullIdx := 0
if r.isLarge {
encoder.initColIDs32()
encoder.initOffsets32()
} else {
encoder.initColIDs()
encoder.initOffsets()
}
for i, colID := range encoder.tempColIDs {
if encoder.values[i].IsNull() {
if r.isLarge {
r.colIDs32[nullIdx] = uint32(colID)
} else {
r.colIDs[nullIdx] = byte(colID)
}
nullIdx++
} else {
if r.isLarge {
r.colIDs32[notNullIdx] = uint32(colID)
} else {
r.colIDs[notNullIdx] = byte(colID)
}
encoder.values[notNullIdx] = encoder.values[i]
notNullIdx++
}
}
if r.isLarge {
largeNotNullSorter := (*largeNotNullSorter)(encoder)
sort.Sort(largeNotNullSorter)
if r.numNullCols > 0 {
largeNullSorter := (*largeNullSorter)(encoder)
sort.Sort(largeNullSorter)
}
} else {
smallNotNullSorter := (*smallNotNullSorter)(encoder)
sort.Sort(smallNotNullSorter)
if r.numNullCols > 0 {
smallNullSorter := (*smallNullSorter)(encoder)
sort.Sort(smallNullSorter)
}
}
for i := 0; i < notNullIdx; i++ {
var err error
r.data, err = encodeDatum(r.data, encoder.values[i], encoder.sc)
if err != nil {
return nil, errors.Trace(err)
}
if len(r.data) > math.MaxUint16 && !r.isLarge {
// We need to convert the row to large row.
encoder.initColIDs32()
for j := 0; j < numCols; j++ {
r.colIDs32[j] = uint32(r.colIDs[j])
}
encoder.initOffsets32()
for j := 0; j <= i; j++ {
r.offsets32[j] = uint32(r.offsets[j])
}
r.isLarge = true
}
if r.isLarge {
r.offsets32[i] = uint32(len(r.data))
} else {
r.offsets[i] = uint16(len(r.data))
}
}
buf = append(buf, CodecVer)
flag := byte(0)
if r.isLarge {
flag = 1
}
buf = append(buf, flag)
buf = append(buf, byte(r.numNotNullCols), byte(r.numNotNullCols>>8))
buf = append(buf, byte(r.numNullCols), byte(r.numNullCols>>8))
if r.isLarge {
buf = append(buf, u32SliceToBytes(r.colIDs32)...)
buf = append(buf, u32SliceToBytes(r.offsets32)...)
} else {
buf = append(buf, r.colIDs...)
buf = append(buf, u16SliceToBytes(r.offsets)...)
}
buf = append(buf, r.data...)
return buf, nil
}
func encodeDatum(buf []byte, d types.Datum, sc *stmtctx.StatementContext) ([]byte, error) {
switch d.Kind() {
case types.KindInt64:
buf = encodeInt(buf, d.GetInt64())
case types.KindUint64:
buf = encodeUint(buf, d.GetUint64())
case types.KindString, types.KindBytes:
buf = append(buf, d.GetBytes()...)
case types.KindFloat32, types.KindFloat64:
buf = encodeUint(buf, uint64(math.Float64bits(d.GetFloat64())))
case types.KindMysqlDecimal:
var err error
buf, err = codec.EncodeDecimal(buf, d.GetMysqlDecimal(), d.Length(), d.Frac())
if terror.ErrorEqual(err, types.ErrTruncated) {
err = sc.HandleTruncate(err)
} else if terror.ErrorEqual(err, types.ErrOverflow) {
err = sc.HandleOverflow(err, err)
}
if err != nil {
return nil, errors.Trace(err)
}
case types.KindMysqlTime:
t := d.GetMysqlTime()
// Encoding timestamp need to consider timezone.
// If it's not in UTC, transform to UTC first.
if t.Type == mysql.TypeTimestamp && sc.TimeZone != time.UTC {
err := t.ConvertTimeZone(sc.TimeZone, time.UTC)
if err != nil {
return nil, errors.Trace(err)
}
}
v, err := t.ToPackedUint()
if err != nil {
return nil, errors.Trace(err)
}
buf = encodeUint(buf, v)
case types.KindMysqlDuration:
buf = encodeInt(buf, int64(d.GetMysqlDuration().Duration))
case types.KindMysqlEnum:
buf = encodeUint(buf, uint64(d.GetMysqlEnum().ToNumber()))
case types.KindMysqlSet:
buf = encodeUint(buf, uint64(d.GetMysqlSet().ToNumber()))
case types.KindMysqlBit, types.KindBinaryLiteral:
val, err := types.BinaryLiteral(d.GetBytes()).ToInt(sc)
if err != nil {
terror.Log(errors.Trace(err))
}
buf = encodeUint(buf, val)
case types.KindMysqlJSON:
j := d.GetMysqlJSON()
buf = append(buf, j.TypeCode)
buf = append(buf, j.Value...)
default:
return nil, errors.Errorf("unsupport encode type %d", d.Kind())
}
return buf, nil
}
func (encoder *Encoder) initColIDs() {
numCols := int(encoder.numNotNullCols + encoder.numNullCols)
if cap(encoder.colIDs) >= numCols {
encoder.colIDs = encoder.colIDs[:numCols]
} else {
encoder.colIDs = make([]byte, numCols)
}
}
func (encoder *Encoder) initColIDs32() {
numCols := int(encoder.numNotNullCols + encoder.numNullCols)
if cap(encoder.colIDs32) >= numCols {
encoder.colIDs32 = encoder.colIDs32[:numCols]
} else {
encoder.colIDs32 = make([]uint32, numCols)
}
}
func (encoder *Encoder) initOffsets() {
if cap(encoder.offsets) >= int(encoder.numNotNullCols) {
encoder.offsets = encoder.offsets[:encoder.numNotNullCols]
} else {
encoder.offsets = make([]uint16, encoder.numNotNullCols)
}
}
func (encoder *Encoder) initOffsets32() {
if cap(encoder.offsets32) >= int(encoder.numNotNullCols) {
encoder.offsets32 = encoder.offsets32[:encoder.numNotNullCols]
} else {
encoder.offsets32 = make([]uint32, encoder.numNotNullCols)
}
}
/*
We define several sorters to avoid switch cost in sort functions.
*/
type largeNotNullSorter Encoder
func (s *largeNotNullSorter) Less(i, j int) bool {
return s.colIDs32[i] < s.colIDs32[j]
}
func (s *largeNotNullSorter) Len() int {
return int(s.numNotNullCols)
}
func (s *largeNotNullSorter) Swap(i, j int) {
s.colIDs32[i], s.colIDs32[j] = s.colIDs32[j], s.colIDs32[i]
s.values[i], s.values[j] = s.values[j], s.values[i]
}
type smallNotNullSorter Encoder
func (s *smallNotNullSorter) Less(i, j int) bool {
return s.colIDs[i] < s.colIDs[j]
}
func (s *smallNotNullSorter) Len() int {
return int(s.numNotNullCols)
}
func (s *smallNotNullSorter) Swap(i, j int) {
s.colIDs[i], s.colIDs[j] = s.colIDs[j], s.colIDs[i]
s.values[i], s.values[j] = s.values[j], s.values[i]
}
type smallNullSorter Encoder
func (s *smallNullSorter) Less(i, j int) bool {
nullCols := s.colIDs[s.numNotNullCols:]
return nullCols[i] < nullCols[j]
}
func (s *smallNullSorter) Len() int {
return int(s.numNullCols)
}
func (s *smallNullSorter) Swap(i, j int) {
nullCols := s.colIDs[s.numNotNullCols:]
nullCols[i], nullCols[j] = nullCols[j], nullCols[i]
}
type largeNullSorter Encoder
func (s *largeNullSorter) Less(i, j int) bool {
nullCols := s.colIDs32[s.numNotNullCols:]
return nullCols[i] < nullCols[j]
}
func (s *largeNullSorter) Len() int {
return int(s.numNullCols)
}
func (s *largeNullSorter) Swap(i, j int) {
nullCols := s.colIDs32[s.numNotNullCols:]
nullCols[i], nullCols[j] = nullCols[j], nullCols[i]
}