diff --git a/expression/builtin.go b/expression/builtin.go index 98049ba148..dc1c1d40bd 100644 --- a/expression/builtin.go +++ b/expression/builtin.go @@ -18,6 +18,7 @@ package expression import ( + "github.com/pingcap/errors" "github.com/pingcap/parser/ast" "github.com/pingcap/parser/charset" "github.com/pingcap/parser/mysql" @@ -171,32 +172,36 @@ func (b *baseBuiltinFunc) getArgs() []Expression { return b.args } +func (b *baseBuiltinFunc) vecEval(input *chunk.Chunk, result *chunk.Column) error { + return errors.Errorf("baseBuiltinFunc.vecEval() should never be called, please contact the TiDB team for help") +} + func (b *baseBuiltinFunc) evalInt(row chunk.Row) (int64, bool, error) { - panic("baseBuiltinFunc.evalInt() should never be called.") + return 0, false, errors.Errorf("baseBuiltinFunc.evalInt() should never be called, please contact the TiDB team for help") } func (b *baseBuiltinFunc) evalReal(row chunk.Row) (float64, bool, error) { - panic("baseBuiltinFunc.evalReal() should never be called.") + return 0, false, errors.Errorf("baseBuiltinFunc.evalReal() should never be called, please contact the TiDB team for help") } func (b *baseBuiltinFunc) evalString(row chunk.Row) (string, bool, error) { - panic("baseBuiltinFunc.evalString() should never be called.") + return "", false, errors.Errorf("baseBuiltinFunc.evalString() should never be called, please contact the TiDB team for help") } func (b *baseBuiltinFunc) evalDecimal(row chunk.Row) (*types.MyDecimal, bool, error) { - panic("baseBuiltinFunc.evalDecimal() should never be called.") + return nil, false, errors.Errorf("baseBuiltinFunc.evalDecimal() should never be called, please contact the TiDB team for help") } func (b *baseBuiltinFunc) evalTime(row chunk.Row) (types.Time, bool, error) { - panic("baseBuiltinFunc.evalTime() should never be called.") + return types.Time{}, false, errors.Errorf("baseBuiltinFunc.evalTime() should never be called, please contact the TiDB team for help") } func (b *baseBuiltinFunc) evalDuration(row chunk.Row) (types.Duration, bool, error) { - panic("baseBuiltinFunc.evalDuration() should never be called.") + return types.Duration{}, false, errors.Errorf("baseBuiltinFunc.evalDuration() should never be called, please contact the TiDB team for help") } func (b *baseBuiltinFunc) evalJSON(row chunk.Row) (json.BinaryJSON, bool, error) { - panic("baseBuiltinFunc.evalJSON() should never be called.") + return json.BinaryJSON{}, false, errors.Errorf("baseBuiltinFunc.evalJSON() should never be called, please contact the TiDB team for help") } func (b *baseBuiltinFunc) getRetTp() *types.FieldType { @@ -276,8 +281,16 @@ func newBaseBuiltinCastFunc(builtinFunc baseBuiltinFunc, inUnion bool) baseBuilt } } +// vecBuiltinFunc contains all vectorized methods for a builtin function. +type vecBuiltinFunc interface { + // vecEval evaluates this builtin function in a vectorized manner. + vecEval(input *chunk.Chunk, result *chunk.Column) error +} + // builtinFunc stands for a particular function signature. type builtinFunc interface { + vecBuiltinFunc + // evalInt evaluates int result of builtinFunc by given row. evalInt(row chunk.Row) (val int64, isNull bool, err error) // evalReal evaluates real representation of builtinFunc by given row. diff --git a/expression/column.go b/expression/column.go index 99b94e0ea4..e89eb93ebe 100644 --- a/expression/column.go +++ b/expression/column.go @@ -40,6 +40,11 @@ func (col *CorrelatedColumn) Clone() Expression { return col } +// VecEval evaluates this expression in a vectorized manner. +func (col *CorrelatedColumn) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) (err error) { + return genVecFromConstExpr(ctx, col, input, result) +} + // Eval implements Expression interface. func (col *CorrelatedColumn) Eval(row chunk.Row) (types.Datum, error) { return *col.Data, nil @@ -181,6 +186,12 @@ func (col *Column) Equal(_ sessionctx.Context, expr Expression) bool { return false } +// VecEval evaluates this expression in a vectorized manner. +func (col *Column) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error { + input.Column(col.Index).CopyConstruct(result) + return nil +} + // String implements Stringer interface. func (col *Column) String() string { result := col.ColName.L diff --git a/expression/constant.go b/expression/constant.go index 7e7948cf38..360da45132 100644 --- a/expression/constant.go +++ b/expression/constant.go @@ -88,6 +88,14 @@ func (c *Constant) GetType() *types.FieldType { return c.RetType } +// VecEval evaluates this expression in a vectorized manner. +func (c *Constant) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error { + if c.DeferredExpr == nil { + return genVecFromConstExpr(ctx, c, input, result) + } + return c.DeferredExpr.VecEval(ctx, input, result) +} + // Eval implements Expression interface. func (c *Constant) Eval(_ chunk.Row) (types.Datum, error) { if c.DeferredExpr != nil { diff --git a/expression/constant_test.go b/expression/constant_test.go index e5fe7b5681..84e241b3f6 100644 --- a/expression/constant_test.go +++ b/expression/constant_test.go @@ -423,3 +423,58 @@ func (*testExpressionSuite) TestDeferredExprNotNull(c *C) { cln := cst.Clone().(*Constant) c.Assert(cln.DeferredExpr, Equals, cst.DeferredExpr) } + +func (*testExpressionSuite) TestVectorizedConstant(c *C) { + // fixed-length type with/without Sel + for _, cst := range []*Constant{ + {RetType: newIntFieldType(), Value: types.NewIntDatum(2333)}, + {RetType: newIntFieldType(), DeferredExpr: &Constant{RetType: newIntFieldType(), Value: types.NewIntDatum(2333)}}} { + chk := chunk.New([]*types.FieldType{newIntFieldType()}, 1024, 1024) + for i := 0; i < 1024; i++ { + chk.AppendInt64(0, int64(i)) + } + col := chunk.NewColumn(newIntFieldType(), 1024) + ctx := mock.NewContext() + c.Assert(cst.VecEval(ctx, chk, col), IsNil) + i64s := col.Int64s() + c.Assert(len(i64s), Equals, 1024) + for _, v := range i64s { + c.Assert(v, Equals, int64(2333)) + } + + // fixed-length type with Sel + sel := []int{2, 3, 5, 7, 11, 13, 17, 19, 23, 29} + chk.SetSel(sel) + c.Assert(cst.VecEval(ctx, chk, col), IsNil) + i64s = col.Int64s() + for _, i := range sel { + c.Assert(i64s[i], Equals, int64(2333)) + } + } + + // var-length type with/without Sel + for _, cst := range []*Constant{ + {RetType: newStringFieldType(), Value: types.NewStringDatum("hello")}, + {RetType: newStringFieldType(), DeferredExpr: &Constant{RetType: newStringFieldType(), Value: types.NewStringDatum("hello")}}} { + chk := chunk.New([]*types.FieldType{newIntFieldType()}, 1024, 1024) + for i := 0; i < 1024; i++ { + chk.AppendInt64(0, int64(i)) + } + cst = &Constant{DeferredExpr: nil, RetType: newStringFieldType(), Value: types.NewStringDatum("hello")} + chk.SetSel(nil) + col := chunk.NewColumn(newStringFieldType(), 1024) + ctx := mock.NewContext() + c.Assert(cst.VecEval(ctx, chk, col), IsNil) + for i := 0; i < 1024; i++ { + c.Assert(col.GetString(i), Equals, "hello") + } + + // var-length type with Sel + sel := []int{2, 3, 5, 7, 11, 13, 17, 19, 23, 29} + chk.SetSel(sel) + c.Assert(cst.VecEval(ctx, chk, col), IsNil) + for _, i := range sel { + c.Assert(col.GetString(i), Equals, "hello") + } + } +} diff --git a/expression/expression.go b/expression/expression.go index d1ea0e24d7..14c3edcb64 100644 --- a/expression/expression.go +++ b/expression/expression.go @@ -38,10 +38,17 @@ const ( // EvalAstExpr evaluates ast expression directly. var EvalAstExpr func(sctx sessionctx.Context, expr ast.ExprNode) (types.Datum, error) +// VecExpr contains all vectorized evaluation methods. +type VecExpr interface { + // VecEval evaluates this expression in a vectorized manner. + VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error +} + // Expression represents all scalar expression in SQL. type Expression interface { fmt.Stringer goJSON.Marshaler + VecExpr // Eval evaluates an expression through a row. Eval(row chunk.Row) (types.Datum, error) diff --git a/expression/scalar_function.go b/expression/scalar_function.go index 200360323d..873ed8a3c3 100644 --- a/expression/scalar_function.go +++ b/expression/scalar_function.go @@ -41,6 +41,11 @@ type ScalarFunction struct { hashcode []byte } +// VecEval evaluates this expression in a vectorized manner. +func (sf *ScalarFunction) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error { + return sf.Function.vecEval(input, result) +} + // GetArgs gets arguments of function. func (sf *ScalarFunction) GetArgs() []Expression { return sf.Function.getArgs() diff --git a/expression/util_test.go b/expression/util_test.go index dcd5f62c11..1311ca13a0 100644 --- a/expression/util_test.go +++ b/expression/util_test.go @@ -46,37 +46,22 @@ func (s *testUtilSuite) checkPanic(f func()) (ret bool) { } func (s *testUtilSuite) TestBaseBuiltin(c *check.C) { - c.Assert(s.checkPanic(func() { - newBaseBuiltinFuncWithTp(nil, nil, types.ETTimestamp) - }), check.IsTrue) - ctx := mock.NewContext() - c.Assert(s.checkPanic(func() { - newBaseBuiltinFuncWithTp(ctx, nil, types.ETTimestamp, types.ETTimestamp) - }), check.IsTrue) - bf := newBaseBuiltinFuncWithTp(ctx, nil, types.ETTimestamp) - c.Assert(s.checkPanic(func() { - bf.evalInt(chunk.Row{}) - }), check.IsTrue) - c.Assert(s.checkPanic(func() { - bf.evalReal(chunk.Row{}) - }), check.IsTrue) - c.Assert(s.checkPanic(func() { - bf.evalString(chunk.Row{}) - }), check.IsTrue) - c.Assert(s.checkPanic(func() { - bf.evalDecimal(chunk.Row{}) - }), check.IsTrue) - c.Assert(s.checkPanic(func() { - bf.evalTime(chunk.Row{}) - }), check.IsTrue) - c.Assert(s.checkPanic(func() { - bf.evalDuration(chunk.Row{}) - }), check.IsTrue) - c.Assert(s.checkPanic(func() { - bf.evalJSON(chunk.Row{}) - }), check.IsTrue) + _, _, err := bf.evalInt(chunk.Row{}) + c.Assert(err, check.NotNil) + _, _, err = bf.evalReal(chunk.Row{}) + c.Assert(err, check.NotNil) + _, _, err = bf.evalString(chunk.Row{}) + c.Assert(err, check.NotNil) + _, _, err = bf.evalDecimal(chunk.Row{}) + c.Assert(err, check.NotNil) + _, _, err = bf.evalTime(chunk.Row{}) + c.Assert(err, check.NotNil) + _, _, err = bf.evalDuration(chunk.Row{}) + c.Assert(err, check.NotNil) + _, _, err = bf.evalJSON(chunk.Row{}) + c.Assert(err, check.NotNil) } func (s *testUtilSuite) TestClone(c *check.C) { @@ -400,6 +385,10 @@ type MockExpr struct { i interface{} } +func (m *MockExpr) VecEval(ctx sessionctx.Context, input *chunk.Chunk, result *chunk.Column) error { + return nil +} + func (m *MockExpr) String() string { return "" } func (m *MockExpr) MarshalJSON() ([]byte, error) { return nil, nil } func (m *MockExpr) Eval(row chunk.Row) (types.Datum, error) { return types.NewDatum(m.i), m.err } diff --git a/expression/vectorized.go b/expression/vectorized.go new file mode 100644 index 0000000000..366394ee79 --- /dev/null +++ b/expression/vectorized.go @@ -0,0 +1,203 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package expression + +import ( + "github.com/pingcap/errors" + "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/chunk" +) + +func genVecFromConstExpr(ctx sessionctx.Context, expr Expression, input *chunk.Chunk, result *chunk.Column) error { + n := input.NumEffectiveRows() + sel := input.Sel() + tp := expr.GetType() + switch tp.EvalType() { + case types.ETInt: + result.PreAllocInt64(n) + v, isNull, err := expr.EvalInt(ctx, chunk.Row{}) + if err != nil { + return err + } + if isNull { // all slots are set to null by PreAlloc() + return nil + } + i64s := result.Int64s() + if sel == nil { + for i := range i64s { + i64s[i] = v + } + result.SetNulls(0, n, false) + } else { + for _, i := range sel { + i64s[i] = v + result.SetNull(i, false) + } + } + case types.ETReal: + result.PreAllocFloat64(n) + v, isNull, err := expr.EvalReal(ctx, chunk.Row{}) + if err != nil { + return err + } + if isNull { // all slots are set to null by PreAlloc() + return nil + } + f64s := result.Float64s() + if sel == nil { + for i := range f64s { + f64s[i] = v + } + result.SetNulls(0, n, false) + } else { + for _, i := range sel { + f64s[i] = v + result.SetNull(i, false) + } + } + case types.ETDecimal: + result.PreAllocDecimal(n) + v, isNull, err := expr.EvalDecimal(ctx, chunk.Row{}) + if err != nil { + return err + } + if isNull { // all slots are set to null by PreAlloc() + return nil + } + ds := result.Decimals() + if sel == nil { + for i := range ds { + ds[i] = *v + } + result.SetNulls(0, n, false) + } else { + for _, i := range sel { + ds[i] = *v + result.SetNull(i, false) + } + } + case types.ETDatetime, types.ETTimestamp: + result.Reset() + v, isNull, err := expr.EvalTime(ctx, chunk.Row{}) + if err != nil { + return err + } + if isNull { + for i := 0; i < n; i++ { + result.AppendNull() + } + } else { + if sel == nil { + for i := 0; i < n; i++ { + result.AppendTime(v) + } + } else { + pos := 0 + for _, i := range sel { + for pos < i { + result.AppendNull() + pos++ + } + result.AppendTime(v) + pos++ + } + } + } + case types.ETDuration: + result.Reset() + v, isNull, err := expr.EvalDuration(ctx, chunk.Row{}) + if err != nil { + return err + } + if isNull { + for i := 0; i < n; i++ { + result.AppendNull() + } + } else { + if sel == nil { + for i := 0; i < n; i++ { + result.AppendDuration(v) + } + } else { + pos := 0 + for _, i := range sel { + for pos < i { + result.AppendNull() + pos++ + } + result.AppendDuration(v) + pos++ + } + } + } + case types.ETJson: + result.Reset() + v, isNull, err := expr.EvalJSON(ctx, chunk.Row{}) + if err != nil { + return err + } + if isNull { + for i := 0; i < n; i++ { + result.AppendNull() + } + } else { + if sel == nil { + for i := 0; i < n; i++ { + result.AppendJSON(v) + } + } else { + pos := 0 + for _, i := range sel { + for pos < i { + result.AppendNull() + pos++ + } + result.AppendJSON(v) + pos++ + } + } + } + case types.ETString: + result.Reset() + v, isNull, err := expr.EvalString(ctx, chunk.Row{}) + if err != nil { + return err + } + if isNull { + for i := 0; i < n; i++ { + result.AppendNull() + } + } else { + if sel == nil { + for i := 0; i < n; i++ { + result.AppendString(v) + } + } else { + pos := 0 + for _, i := range sel { + for pos < i { + result.AppendNull() + pos++ + } + result.AppendString(v) + pos++ + } + } + } + default: + return errors.Errorf("unsupported Constant type for vectorized evaluation") + } + return nil +} diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 453b57c71b..914a88210d 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -311,6 +311,21 @@ func (c *Chunk) NumRows() int { return c.columns[0].length } +// NumEffectiveRows returns the effective number of rows physically stored in this Chunk. +// It is different with NumRows when sel is not nil. +// For example: if sel is [2, 3, 5, 7, 9], then +// NumRow() returns 5 to indicate that 5 rows are selected logically in this Chunk, while +// NumEffectiveRows() returns 10(9+1) to indicate that at least 10 rows are stored in this Chunk physically. +func (c *Chunk) NumEffectiveRows() int { + if c.sel == nil { + return c.NumRows() + } + if len(c.sel) == 0 { + return 0 + } + return c.sel[len(c.sel)-1] + 1 +} + // GetRow gets the Row in the chunk with the row index. func (c *Chunk) GetRow(idx int) Row { if c.sel != nil { diff --git a/util/chunk/column.go b/util/chunk/column.go index 7a5379ec5e..66d1778ced 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -279,6 +279,24 @@ func (c *Column) SetNull(rowIdx int, isNull bool) { } } +// SetNulls sets rows in [begin, end) to null. +func (c *Column) SetNulls(begin, end int, isNull bool) { + i := ((begin + 7) >> 3) << 3 + for ; begin < i && begin < end; begin++ { + c.SetNull(begin, isNull) + } + var v uint8 + if !isNull { + v = (1 << 8) - 1 + } + for ; begin+8 <= end; begin += 8 { + c.nullBitmap[begin>>3] = v + } + for ; begin < end; begin++ { + c.SetNull(begin, isNull) + } +} + // nullCount returns the number of nulls in this Column. func (c *Column) nullCount() int { var cnt, i int diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 0b04a8fbd2..277cde9366 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -653,3 +653,31 @@ func (s *testChunkSuite) TestNull(c *check.C) { col.SetNull(8, false) c.Assert(col.nullCount(), check.Equals, 8) } + +func (s *testChunkSuite) TestSetNulls(c *check.C) { + col := newFixedLenColumn(sizeFloat64, 32) + col.PreAllocFloat64(1024) + c.Assert(col.nullCount(), check.Equals, 1024) + + col.SetNulls(0, 1024, false) + c.Assert(col.nullCount(), check.Equals, 0) + + nullMap := make(map[int]struct{}) + for i := 0; i < 100; i++ { + begin := rand.Intn(1024) + l := rand.Intn(37) + end := begin + l + if end > 1024 { + end = 1024 + } + for i := begin; i < end; i++ { + nullMap[i] = struct{}{} + } + col.SetNulls(begin, end, true) + + c.Assert(col.nullCount(), check.Equals, len(nullMap)) + for k := range nullMap { + c.Assert(col.IsNull(k), check.Equals, true) + } + } +}