expression: add vectorized evaluation methods to Expression (#11530)

This commit is contained in:
Yuanjia Zhang
2019-08-01 13:28:23 +08:00
committed by GitHub
parent 7b63293180
commit d54f0a2bd4
11 changed files with 388 additions and 36 deletions

View File

@ -18,6 +18,7 @@
package expression
import (
"github.com/pingcap/errors"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/charset"
"github.com/pingcap/parser/mysql"
@ -171,32 +172,36 @@ func (b *baseBuiltinFunc) getArgs() []Expression {
return b.args
}
func (b *baseBuiltinFunc) vecEval(input *chunk.Chunk, result *chunk.Column) error {
return errors.Errorf("baseBuiltinFunc.vecEval() should never be called, please contact the TiDB team for help")
}
func (b *baseBuiltinFunc) evalInt(row chunk.Row) (int64, bool, error) {
panic("baseBuiltinFunc.evalInt() should never be called.")
return 0, false, errors.Errorf("baseBuiltinFunc.evalInt() should never be called, please contact the TiDB team for help")
}
func (b *baseBuiltinFunc) evalReal(row chunk.Row) (float64, bool, error) {
panic("baseBuiltinFunc.evalReal() should never be called.")
return 0, false, errors.Errorf("baseBuiltinFunc.evalReal() should never be called, please contact the TiDB team for help")
}
func (b *baseBuiltinFunc) evalString(row chunk.Row) (string, bool, error) {
panic("baseBuiltinFunc.evalString() should never be called.")
return "", false, errors.Errorf("baseBuiltinFunc.evalString() should never be called, please contact the TiDB team for help")
}
func (b *baseBuiltinFunc) evalDecimal(row chunk.Row) (*types.MyDecimal, bool, error) {
panic("baseBuiltinFunc.evalDecimal() should never be called.")
return nil, false, errors.Errorf("baseBuiltinFunc.evalDecimal() should never be called, please contact the TiDB team for help")
}
func (b *baseBuiltinFunc) evalTime(row chunk.Row) (types.Time, bool, error) {
panic("baseBuiltinFunc.evalTime() should never be called.")
return types.Time{}, false, errors.Errorf("baseBuiltinFunc.evalTime() should never be called, please contact the TiDB team for help")
}
func (b *baseBuiltinFunc) evalDuration(row chunk.Row) (types.Duration, bool, error) {
panic("baseBuiltinFunc.evalDuration() should never be called.")
return types.Duration{}, false, errors.Errorf("baseBuiltinFunc.evalDuration() should never be called, please contact the TiDB team for help")
}
func (b *baseBuiltinFunc) evalJSON(row chunk.Row) (json.BinaryJSON, bool, error) {
panic("baseBuiltinFunc.evalJSON() should never be called.")
return json.BinaryJSON{}, false, errors.Errorf("baseBuiltinFunc.evalJSON() should never be called, please contact the TiDB team for help")
}
func (b *baseBuiltinFunc) getRetTp() *types.FieldType {
@ -276,8 +281,16 @@ func newBaseBuiltinCastFunc(builtinFunc baseBuiltinFunc, inUnion bool) baseBuilt
}
}
// vecBuiltinFunc contains all vectorized methods for a builtin function.
type vecBuiltinFunc interface {
// vecEval evaluates this builtin function in a vectorized manner.
vecEval(input *chunk.Chunk, result *chunk.Column) error
}
// builtinFunc stands for a particular function signature.
type builtinFunc interface {
vecBuiltinFunc
// evalInt evaluates int result of builtinFunc by given row.
evalInt(row chunk.Row) (val int64, isNull bool, err error)
// evalReal evaluates real representation of builtinFunc by given row.

View File

@ -40,6 +40,11 @@ func (col *CorrelatedColumn) Clone() Expression {
return col
}
// VecEval evaluates this expression in a vectorized manner.
func (col *CorrelatedColumn) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) (err error) {
return genVecFromConstExpr(ctx, col, input, result)
}
// Eval implements Expression interface.
func (col *CorrelatedColumn) Eval(row chunk.Row) (types.Datum, error) {
return *col.Data, nil
@ -181,6 +186,12 @@ func (col *Column) Equal(_ sessionctx.Context, expr Expression) bool {
return false
}
// VecEval evaluates this expression in a vectorized manner.
func (col *Column) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error {
input.Column(col.Index).CopyConstruct(result)
return nil
}
// String implements Stringer interface.
func (col *Column) String() string {
result := col.ColName.L

View File

@ -88,6 +88,14 @@ func (c *Constant) GetType() *types.FieldType {
return c.RetType
}
// VecEval evaluates this expression in a vectorized manner.
func (c *Constant) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error {
if c.DeferredExpr == nil {
return genVecFromConstExpr(ctx, c, input, result)
}
return c.DeferredExpr.VecEval(ctx, input, result)
}
// Eval implements Expression interface.
func (c *Constant) Eval(_ chunk.Row) (types.Datum, error) {
if c.DeferredExpr != nil {

View File

@ -423,3 +423,58 @@ func (*testExpressionSuite) TestDeferredExprNotNull(c *C) {
cln := cst.Clone().(*Constant)
c.Assert(cln.DeferredExpr, Equals, cst.DeferredExpr)
}
func (*testExpressionSuite) TestVectorizedConstant(c *C) {
// fixed-length type with/without Sel
for _, cst := range []*Constant{
{RetType: newIntFieldType(), Value: types.NewIntDatum(2333)},
{RetType: newIntFieldType(), DeferredExpr: &Constant{RetType: newIntFieldType(), Value: types.NewIntDatum(2333)}}} {
chk := chunk.New([]*types.FieldType{newIntFieldType()}, 1024, 1024)
for i := 0; i < 1024; i++ {
chk.AppendInt64(0, int64(i))
}
col := chunk.NewColumn(newIntFieldType(), 1024)
ctx := mock.NewContext()
c.Assert(cst.VecEval(ctx, chk, col), IsNil)
i64s := col.Int64s()
c.Assert(len(i64s), Equals, 1024)
for _, v := range i64s {
c.Assert(v, Equals, int64(2333))
}
// fixed-length type with Sel
sel := []int{2, 3, 5, 7, 11, 13, 17, 19, 23, 29}
chk.SetSel(sel)
c.Assert(cst.VecEval(ctx, chk, col), IsNil)
i64s = col.Int64s()
for _, i := range sel {
c.Assert(i64s[i], Equals, int64(2333))
}
}
// var-length type with/without Sel
for _, cst := range []*Constant{
{RetType: newStringFieldType(), Value: types.NewStringDatum("hello")},
{RetType: newStringFieldType(), DeferredExpr: &Constant{RetType: newStringFieldType(), Value: types.NewStringDatum("hello")}}} {
chk := chunk.New([]*types.FieldType{newIntFieldType()}, 1024, 1024)
for i := 0; i < 1024; i++ {
chk.AppendInt64(0, int64(i))
}
cst = &Constant{DeferredExpr: nil, RetType: newStringFieldType(), Value: types.NewStringDatum("hello")}
chk.SetSel(nil)
col := chunk.NewColumn(newStringFieldType(), 1024)
ctx := mock.NewContext()
c.Assert(cst.VecEval(ctx, chk, col), IsNil)
for i := 0; i < 1024; i++ {
c.Assert(col.GetString(i), Equals, "hello")
}
// var-length type with Sel
sel := []int{2, 3, 5, 7, 11, 13, 17, 19, 23, 29}
chk.SetSel(sel)
c.Assert(cst.VecEval(ctx, chk, col), IsNil)
for _, i := range sel {
c.Assert(col.GetString(i), Equals, "hello")
}
}
}

View File

@ -38,10 +38,17 @@ const (
// EvalAstExpr evaluates ast expression directly.
var EvalAstExpr func(sctx sessionctx.Context, expr ast.ExprNode) (types.Datum, error)
// VecExpr contains all vectorized evaluation methods.
type VecExpr interface {
// VecEval evaluates this expression in a vectorized manner.
VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error
}
// Expression represents all scalar expression in SQL.
type Expression interface {
fmt.Stringer
goJSON.Marshaler
VecExpr
// Eval evaluates an expression through a row.
Eval(row chunk.Row) (types.Datum, error)

View File

@ -41,6 +41,11 @@ type ScalarFunction struct {
hashcode []byte
}
// VecEval evaluates this expression in a vectorized manner.
func (sf *ScalarFunction) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error {
return sf.Function.vecEval(input, result)
}
// GetArgs gets arguments of function.
func (sf *ScalarFunction) GetArgs() []Expression {
return sf.Function.getArgs()

View File

@ -46,37 +46,22 @@ func (s *testUtilSuite) checkPanic(f func()) (ret bool) {
}
func (s *testUtilSuite) TestBaseBuiltin(c *check.C) {
c.Assert(s.checkPanic(func() {
newBaseBuiltinFuncWithTp(nil, nil, types.ETTimestamp)
}), check.IsTrue)
ctx := mock.NewContext()
c.Assert(s.checkPanic(func() {
newBaseBuiltinFuncWithTp(ctx, nil, types.ETTimestamp, types.ETTimestamp)
}), check.IsTrue)
bf := newBaseBuiltinFuncWithTp(ctx, nil, types.ETTimestamp)
c.Assert(s.checkPanic(func() {
bf.evalInt(chunk.Row{})
}), check.IsTrue)
c.Assert(s.checkPanic(func() {
bf.evalReal(chunk.Row{})
}), check.IsTrue)
c.Assert(s.checkPanic(func() {
bf.evalString(chunk.Row{})
}), check.IsTrue)
c.Assert(s.checkPanic(func() {
bf.evalDecimal(chunk.Row{})
}), check.IsTrue)
c.Assert(s.checkPanic(func() {
bf.evalTime(chunk.Row{})
}), check.IsTrue)
c.Assert(s.checkPanic(func() {
bf.evalDuration(chunk.Row{})
}), check.IsTrue)
c.Assert(s.checkPanic(func() {
bf.evalJSON(chunk.Row{})
}), check.IsTrue)
_, _, err := bf.evalInt(chunk.Row{})
c.Assert(err, check.NotNil)
_, _, err = bf.evalReal(chunk.Row{})
c.Assert(err, check.NotNil)
_, _, err = bf.evalString(chunk.Row{})
c.Assert(err, check.NotNil)
_, _, err = bf.evalDecimal(chunk.Row{})
c.Assert(err, check.NotNil)
_, _, err = bf.evalTime(chunk.Row{})
c.Assert(err, check.NotNil)
_, _, err = bf.evalDuration(chunk.Row{})
c.Assert(err, check.NotNil)
_, _, err = bf.evalJSON(chunk.Row{})
c.Assert(err, check.NotNil)
}
func (s *testUtilSuite) TestClone(c *check.C) {
@ -400,6 +385,10 @@ type MockExpr struct {
i interface{}
}
func (m *MockExpr) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error {
return nil
}
func (m *MockExpr) String() string { return "" }
func (m *MockExpr) MarshalJSON() ([]byte, error) { return nil, nil }
func (m *MockExpr) Eval(row chunk.Row) (types.Datum, error) { return types.NewDatum(m.i), m.err }

203
expression/vectorized.go Normal file
View File

@ -0,0 +1,203 @@
// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package expression
import (
"github.com/pingcap/errors"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
)
func genVecFromConstExpr(ctx sessionctx.Context, expr Expression, input *chunk.Chunk, result *chunk.Column) error {
n := input.NumEffectiveRows()
sel := input.Sel()
tp := expr.GetType()
switch tp.EvalType() {
case types.ETInt:
result.PreAllocInt64(n)
v, isNull, err := expr.EvalInt(ctx, chunk.Row{})
if err != nil {
return err
}
if isNull { // all slots are set to null by PreAlloc()
return nil
}
i64s := result.Int64s()
if sel == nil {
for i := range i64s {
i64s[i] = v
}
result.SetNulls(0, n, false)
} else {
for _, i := range sel {
i64s[i] = v
result.SetNull(i, false)
}
}
case types.ETReal:
result.PreAllocFloat64(n)
v, isNull, err := expr.EvalReal(ctx, chunk.Row{})
if err != nil {
return err
}
if isNull { // all slots are set to null by PreAlloc()
return nil
}
f64s := result.Float64s()
if sel == nil {
for i := range f64s {
f64s[i] = v
}
result.SetNulls(0, n, false)
} else {
for _, i := range sel {
f64s[i] = v
result.SetNull(i, false)
}
}
case types.ETDecimal:
result.PreAllocDecimal(n)
v, isNull, err := expr.EvalDecimal(ctx, chunk.Row{})
if err != nil {
return err
}
if isNull { // all slots are set to null by PreAlloc()
return nil
}
ds := result.Decimals()
if sel == nil {
for i := range ds {
ds[i] = *v
}
result.SetNulls(0, n, false)
} else {
for _, i := range sel {
ds[i] = *v
result.SetNull(i, false)
}
}
case types.ETDatetime, types.ETTimestamp:
result.Reset()
v, isNull, err := expr.EvalTime(ctx, chunk.Row{})
if err != nil {
return err
}
if isNull {
for i := 0; i < n; i++ {
result.AppendNull()
}
} else {
if sel == nil {
for i := 0; i < n; i++ {
result.AppendTime(v)
}
} else {
pos := 0
for _, i := range sel {
for pos < i {
result.AppendNull()
pos++
}
result.AppendTime(v)
pos++
}
}
}
case types.ETDuration:
result.Reset()
v, isNull, err := expr.EvalDuration(ctx, chunk.Row{})
if err != nil {
return err
}
if isNull {
for i := 0; i < n; i++ {
result.AppendNull()
}
} else {
if sel == nil {
for i := 0; i < n; i++ {
result.AppendDuration(v)
}
} else {
pos := 0
for _, i := range sel {
for pos < i {
result.AppendNull()
pos++
}
result.AppendDuration(v)
pos++
}
}
}
case types.ETJson:
result.Reset()
v, isNull, err := expr.EvalJSON(ctx, chunk.Row{})
if err != nil {
return err
}
if isNull {
for i := 0; i < n; i++ {
result.AppendNull()
}
} else {
if sel == nil {
for i := 0; i < n; i++ {
result.AppendJSON(v)
}
} else {
pos := 0
for _, i := range sel {
for pos < i {
result.AppendNull()
pos++
}
result.AppendJSON(v)
pos++
}
}
}
case types.ETString:
result.Reset()
v, isNull, err := expr.EvalString(ctx, chunk.Row{})
if err != nil {
return err
}
if isNull {
for i := 0; i < n; i++ {
result.AppendNull()
}
} else {
if sel == nil {
for i := 0; i < n; i++ {
result.AppendString(v)
}
} else {
pos := 0
for _, i := range sel {
for pos < i {
result.AppendNull()
pos++
}
result.AppendString(v)
pos++
}
}
}
default:
return errors.Errorf("unsupported Constant type for vectorized evaluation")
}
return nil
}

View File

@ -311,6 +311,21 @@ func (c *Chunk) NumRows() int {
return c.columns[0].length
}
// NumEffectiveRows returns the effective number of rows physically stored in this Chunk.
// It is different with NumRows when sel is not nil.
// For example: if sel is [2, 3, 5, 7, 9], then
// NumRow() returns 5 to indicate that 5 rows are selected logically in this Chunk, while
// NumEffectiveRows() returns 10(9+1) to indicate that at least 10 rows are stored in this Chunk physically.
func (c *Chunk) NumEffectiveRows() int {
if c.sel == nil {
return c.NumRows()
}
if len(c.sel) == 0 {
return 0
}
return c.sel[len(c.sel)-1] + 1
}
// GetRow gets the Row in the chunk with the row index.
func (c *Chunk) GetRow(idx int) Row {
if c.sel != nil {

View File

@ -279,6 +279,24 @@ func (c *Column) SetNull(rowIdx int, isNull bool) {
}
}
// SetNulls sets rows in [begin, end) to null.
func (c *Column) SetNulls(begin, end int, isNull bool) {
i := ((begin + 7) >> 3) << 3
for ; begin < i && begin < end; begin++ {
c.SetNull(begin, isNull)
}
var v uint8
if !isNull {
v = (1 << 8) - 1
}
for ; begin+8 <= end; begin += 8 {
c.nullBitmap[begin>>3] = v
}
for ; begin < end; begin++ {
c.SetNull(begin, isNull)
}
}
// nullCount returns the number of nulls in this Column.
func (c *Column) nullCount() int {
var cnt, i int

View File

@ -653,3 +653,31 @@ func (s *testChunkSuite) TestNull(c *check.C) {
col.SetNull(8, false)
c.Assert(col.nullCount(), check.Equals, 8)
}
func (s *testChunkSuite) TestSetNulls(c *check.C) {
col := newFixedLenColumn(sizeFloat64, 32)
col.PreAllocFloat64(1024)
c.Assert(col.nullCount(), check.Equals, 1024)
col.SetNulls(0, 1024, false)
c.Assert(col.nullCount(), check.Equals, 0)
nullMap := make(map[int]struct{})
for i := 0; i < 100; i++ {
begin := rand.Intn(1024)
l := rand.Intn(37)
end := begin + l
if end > 1024 {
end = 1024
}
for i := begin; i < end; i++ {
nullMap[i] = struct{}{}
}
col.SetNulls(begin, end, true)
c.Assert(col.nullCount(), check.Equals, len(nullMap))
for k := range nullMap {
c.Assert(col.IsNull(k), check.Equals, true)
}
}
}