335 lines
13 KiB
Go
335 lines
13 KiB
Go
// Copyright 2015 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package aggregate
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math"
|
|
"math/rand"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/pingcap/failpoint"
|
|
"github.com/pingcap/tidb/pkg/executor/aggregate"
|
|
"github.com/pingcap/tidb/pkg/session"
|
|
"github.com/pingcap/tidb/pkg/testkit"
|
|
"github.com/pingcap/tidb/pkg/util/sqlexec"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestHashAggRuntimeStat(t *testing.T) {
|
|
partialInfo := &aggregate.AggWorkerInfo{
|
|
Concurrency: 5,
|
|
WallTime: int64(time.Second * 20),
|
|
}
|
|
finalInfo := &aggregate.AggWorkerInfo{
|
|
Concurrency: 8,
|
|
WallTime: int64(time.Second * 10),
|
|
}
|
|
stats := &aggregate.HashAggRuntimeStats{
|
|
PartialConcurrency: 5,
|
|
PartialWallTime: int64(time.Second * 20),
|
|
FinalConcurrency: 8,
|
|
FinalWallTime: int64(time.Second * 10),
|
|
}
|
|
for i := 0; i < partialInfo.Concurrency; i++ {
|
|
stats.PartialStats = append(stats.PartialStats, &aggregate.AggWorkerStat{
|
|
TaskNum: 5,
|
|
WaitTime: int64(2 * time.Second),
|
|
ExecTime: int64(1 * time.Second),
|
|
WorkerTime: int64(i) * int64(time.Second),
|
|
})
|
|
}
|
|
for i := 0; i < finalInfo.Concurrency; i++ {
|
|
stats.FinalStats = append(stats.FinalStats, &aggregate.AggWorkerStat{
|
|
TaskNum: 5,
|
|
WaitTime: int64(2 * time.Millisecond),
|
|
ExecTime: int64(1 * time.Millisecond),
|
|
WorkerTime: int64(i) * int64(time.Millisecond),
|
|
})
|
|
}
|
|
expect := "partial_worker:{wall_time:20s, concurrency:5, task_num:25, tot_wait:10s, tot_exec:5s, tot_time:10s, max:4s, p95:4s}, final_worker:{wall_time:10s, concurrency:8, task_num:40, tot_wait:16ms, tot_exec:8ms, tot_time:28ms, max:7ms, p95:7ms}"
|
|
require.Equal(t, expect, stats.String())
|
|
require.Equal(t, expect, stats.Clone().String())
|
|
stats.Merge(stats.Clone())
|
|
expect = "partial_worker:{wall_time:40s, concurrency:5, task_num:50, tot_wait:20s, tot_exec:10s, tot_time:20s, max:4s, p95:4s}, final_worker:{wall_time:20s, concurrency:8, task_num:80, tot_wait:32ms, tot_exec:16ms, tot_time:56ms, max:7ms, p95:7ms}"
|
|
require.Equal(t, expect, stats.String())
|
|
}
|
|
|
|
func reconstructParallelGroupConcatResult(rows [][]interface{}) []string {
|
|
data := make([]string, 0, len(rows))
|
|
for _, row := range rows {
|
|
if str, ok := row[0].(string); ok {
|
|
tokens := strings.Split(str, ",")
|
|
sort.Slice(tokens, func(i, j int) bool {
|
|
return tokens[i] < tokens[j]
|
|
})
|
|
data = append(data, strings.Join(tokens, ","))
|
|
}
|
|
}
|
|
|
|
sort.Slice(data, func(i, j int) bool {
|
|
return data[i] < data[j]
|
|
})
|
|
|
|
return data
|
|
}
|
|
|
|
func TestParallelStreamAggGroupConcat(t *testing.T) {
|
|
store := testkit.CreateMockStore(t)
|
|
tk := testkit.NewTestKit(t, store)
|
|
tk.MustExec("use test;")
|
|
tk.MustExec("drop table if exists t;")
|
|
tk.MustExec("CREATE TABLE t(a bigint, b bigint);")
|
|
tk.MustExec("set tidb_init_chunk_size=1;")
|
|
tk.MustExec("set tidb_max_chunk_size=32;")
|
|
|
|
var insertSQL string
|
|
for i := 0; i < 1000; i++ {
|
|
if i == 0 {
|
|
insertSQL += fmt.Sprintf("(%d, %d)", rand.Intn(100), rand.Intn(100))
|
|
} else {
|
|
insertSQL += fmt.Sprintf(",(%d, %d)", rand.Intn(100), rand.Intn(100))
|
|
}
|
|
}
|
|
tk.MustExec(fmt.Sprintf("insert into t values %s;", insertSQL))
|
|
|
|
sql := "select /*+ stream_agg() */ group_concat(a, b) from t group by b;"
|
|
concurrencies := []int{1, 2, 4, 8}
|
|
var expected []string
|
|
for _, con := range concurrencies {
|
|
tk.MustExec(fmt.Sprintf("set @@tidb_streamagg_concurrency=%d", con))
|
|
if con == 1 {
|
|
expected = reconstructParallelGroupConcatResult(tk.MustQuery(sql).Rows())
|
|
} else {
|
|
er := tk.MustQuery("explain format = 'brief' " + sql).Rows()
|
|
ok := false
|
|
for _, l := range er {
|
|
str := fmt.Sprintf("%v", l)
|
|
if strings.Contains(str, "Shuffle") {
|
|
ok = true
|
|
break
|
|
}
|
|
}
|
|
require.True(t, ok)
|
|
obtained := reconstructParallelGroupConcatResult(tk.MustQuery(sql).Rows())
|
|
require.Equal(t, len(expected), len(obtained))
|
|
for i := 0; i < len(obtained); i++ {
|
|
require.Equal(t, expected[i], obtained[i])
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestIssue20658(t *testing.T) {
|
|
store := testkit.CreateMockStore(t)
|
|
tk := testkit.NewTestKit(t, store)
|
|
tk.MustExec("use test")
|
|
|
|
aggFuncs := []string{"count(a)", "sum(a)", "avg(a)", "max(a)", "min(a)", "bit_or(a)", "bit_xor(a)", "bit_and(a)", "var_pop(a)", "var_samp(a)", "stddev_pop(a)", "stddev_samp(a)", "approx_count_distinct(a)", "approx_percentile(a, 7)"}
|
|
sqlFormat := "select /*+ stream_agg() */ %s from t group by b;"
|
|
|
|
sqls := make([]string, 0, len(aggFuncs))
|
|
for _, af := range aggFuncs {
|
|
sql := fmt.Sprintf(sqlFormat, af)
|
|
sqls = append(sqls, sql)
|
|
}
|
|
|
|
tk.MustExec("drop table if exists t;")
|
|
tk.MustExec("CREATE TABLE t(a bigint, b bigint);")
|
|
tk.MustExec("set tidb_init_chunk_size=1;")
|
|
tk.MustExec("set tidb_max_chunk_size=32;")
|
|
randSeed := time.Now().UnixNano()
|
|
r := rand.New(rand.NewSource(randSeed))
|
|
var insertSQL strings.Builder
|
|
for i := 0; i < 1000; i++ {
|
|
insertSQL.WriteString("(")
|
|
insertSQL.WriteString(strconv.Itoa(r.Intn(10)))
|
|
insertSQL.WriteString(",")
|
|
insertSQL.WriteString(strconv.Itoa(r.Intn(10)))
|
|
insertSQL.WriteString(")")
|
|
if i < 1000-1 {
|
|
insertSQL.WriteString(",")
|
|
}
|
|
}
|
|
tk.MustExec(fmt.Sprintf("insert into t values %s;", insertSQL.String()))
|
|
|
|
mustParseAndSort := func(rows [][]interface{}, cmt string) []float64 {
|
|
ret := make([]float64, len(rows))
|
|
for i := 0; i < len(rows); i++ {
|
|
rowStr := rows[i][0].(string)
|
|
if rowStr == "<nil>" {
|
|
ret[i] = 0
|
|
continue
|
|
}
|
|
v, err := strconv.ParseFloat(rowStr, 64)
|
|
require.NoError(t, err, cmt)
|
|
ret[i] = v
|
|
}
|
|
sort.Float64s(ret)
|
|
return ret
|
|
}
|
|
for _, sql := range sqls {
|
|
tk.MustExec("set @@tidb_streamagg_concurrency = 1;")
|
|
exp := tk.MustQuery(sql).Rows()
|
|
expected := mustParseAndSort(exp, fmt.Sprintf("sql: %s; seed: %d", sql, randSeed))
|
|
for _, con := range []int{2, 4, 8} {
|
|
comment := fmt.Sprintf("sql: %s; concurrency: %d, seed: %d", sql, con, randSeed)
|
|
tk.MustExec(fmt.Sprintf("set @@tidb_streamagg_concurrency=%d;", con))
|
|
er := tk.MustQuery("explain format = 'brief' " + sql).Rows()
|
|
ok := false
|
|
for _, l := range er {
|
|
str := fmt.Sprintf("%v", l)
|
|
if strings.Contains(str, "Shuffle") {
|
|
ok = true
|
|
break
|
|
}
|
|
}
|
|
require.True(t, ok, comment)
|
|
rows := mustParseAndSort(tk.MustQuery(sql).Rows(), comment)
|
|
|
|
require.Equal(t, len(expected), len(rows), comment)
|
|
for i := range rows {
|
|
require.Less(t, math.Abs(rows[i]-expected[i]), 1e-3, comment)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestAggInDisk(t *testing.T) {
|
|
store := testkit.CreateMockStore(t)
|
|
tk := testkit.NewTestKit(t, store)
|
|
tk.MustExec("use test")
|
|
tk.MustExec("set tidb_hashagg_final_concurrency = 1;")
|
|
tk.MustExec("set tidb_hashagg_partial_concurrency = 1;")
|
|
tk.MustExec("set tidb_mem_quota_query = 4194304")
|
|
tk.MustExec("drop table if exists t1")
|
|
tk.MustExec("create table t(a int)")
|
|
sql := "insert into t values (0)"
|
|
for i := 1; i <= 200; i++ {
|
|
sql += fmt.Sprintf(",(%v)", i)
|
|
}
|
|
sql += ";"
|
|
tk.MustExec(sql)
|
|
rows := tk.MustQuery("desc analyze select /*+ HASH_AGG() */ avg(t1.a) from t t1 join t t2 group by t1.a, t2.a;").Rows()
|
|
for _, row := range rows {
|
|
length := len(row)
|
|
line := fmt.Sprintf("%v", row)
|
|
disk := fmt.Sprintf("%v", row[length-1])
|
|
if strings.Contains(line, "HashAgg") {
|
|
require.False(t, strings.Contains(disk, "0 Bytes"))
|
|
require.True(t, strings.Contains(disk, "MB") ||
|
|
strings.Contains(disk, "KB") ||
|
|
strings.Contains(disk, "Bytes"))
|
|
}
|
|
}
|
|
|
|
// Add code cover
|
|
// Test spill chunk. Add a line to avoid tmp spill chunk is always full.
|
|
tk.MustExec("insert into t values(0)")
|
|
tk.MustQuery("select sum(tt.b) from ( select /*+ HASH_AGG() */ avg(t1.a) as b from t t1 join t t2 group by t1.a, t2.a) as tt").Check(
|
|
testkit.Rows("4040100.0000"))
|
|
// Test no groupby and no data.
|
|
tk.MustExec("drop table t;")
|
|
tk.MustExec("create table t(c int, c1 int);")
|
|
tk.MustQuery("select /*+ HASH_AGG() */ count(c) from t;").Check(testkit.Rows("0"))
|
|
tk.MustQuery("select /*+ HASH_AGG() */ count(c) from t group by c1;").Check(testkit.Rows())
|
|
}
|
|
|
|
func TestRandomPanicConsume(t *testing.T) {
|
|
store := testkit.CreateMockStore(t)
|
|
tk := testkit.NewTestKit(t, store)
|
|
tk.MustExec("use test")
|
|
tk.MustExec("set @@tidb_max_chunk_size=32")
|
|
tk.MustExec("set @@tidb_init_chunk_size=1")
|
|
tk.MustExec("drop table if exists t;")
|
|
tk.MustExec("create table t(pk bigint primary key auto_random,a int, index idx(a));")
|
|
tk.MustExec("SPLIT TABLE t BETWEEN (-9223372036854775808) AND (9223372036854775807) REGIONS 50;") // Split 50 regions to simulate many requests
|
|
for i := 0; i <= 1000; i++ {
|
|
tk.MustExec(fmt.Sprintf("insert into t(a) values(%v),(%v),(%v)", i, i, i))
|
|
}
|
|
tk.MustExec("drop table if exists s;")
|
|
tk.MustExec("create table s(pk bigint primary key auto_random,a int, b int, index idx(a));")
|
|
tk.MustExec("SPLIT TABLE s BETWEEN (-9223372036854775808) AND (9223372036854775807) REGIONS 50;") // Split 50 regions to simulate many requests
|
|
for i := 0; i <= 1000; i++ {
|
|
tk.MustExec(fmt.Sprintf("insert into s(a,b) values(%v,%v),(%v,%v),(%v,%v)", i, i, i, i, i, i))
|
|
}
|
|
|
|
fpName := "github.com/pingcap/tidb/pkg/executor/aggregate/ConsumeRandomPanic"
|
|
require.NoError(t, failpoint.Enable(fpName, "5%panic(\"ERROR 1105 (HY000): Out Of Memory Quota![conn=1]\")"))
|
|
defer func() {
|
|
require.NoError(t, failpoint.Disable(fpName))
|
|
}()
|
|
fpName2 := "github.com/pingcap/tidb/pkg/store/copr/ConsumeRandomPanic"
|
|
require.NoError(t, failpoint.Enable(fpName2, "3%panic(\"ERROR 1105 (HY000): Out Of Memory Quota![conn=1]\")"))
|
|
defer func() {
|
|
require.NoError(t, failpoint.Disable(fpName2))
|
|
}()
|
|
|
|
sqls := []string{
|
|
// Without index
|
|
"select /*+ HASH_AGG() */ /*+ USE_INDEX(t) */ count(a) from t group by a", // HashAgg Paralleled
|
|
"select /*+ HASH_AGG() */ /*+ USE_INDEX(t) */ count(distinct a) from t", // HashAgg Unparalleled
|
|
"select /*+ STREAM_AGG() */ /*+ USE_INDEX(t) */ count(a) from t group by a", // Shuffle+StreamAgg
|
|
"select /*+ USE_INDEX(t) */ a * a, a / a, a + a , a - a from t", // Projection
|
|
"select /*+ HASH_JOIN(t1) */ /*+ USE_INDEX(t1) */ /*+ USE_INDEX(t2) */* from t t1 join t t2 on t1.a=t2.a", // HashJoin
|
|
"select /*+ MERGE_JOIN(t1) */ /*+ USE_INDEX(t1) */ /*+ USE_INDEX(t2) */* from t t1 join t t2 on t1.a=t2.a", // Shuffle+MergeJoin
|
|
"select /*+ USE_INDEX(t) */ * from t", // TableScan
|
|
|
|
// With index
|
|
"select /*+ HASH_AGG() */ /*+ USE_INDEX(t,idx) */ count(a) from t group by a", // HashAgg Paralleled
|
|
"select /*+ HASH_AGG() */ /*+ USE_INDEX(t,idx) */ count(distinct a) from t", // HashAgg Unparalleled
|
|
"select /*+ STREAM_AGG() */ /*+ USE_INDEX(t,idx) */ count(a) from t group by a", // Shuffle+StreamAgg
|
|
"select /*+ USE_INDEX(t,idx) */ a * a, a / a, a + a , a - a from t", // Projection
|
|
"select /*+ HASH_JOIN(t1) */ /*+ USE_INDEX(t1,idx) */ /*+ USE_INDEX(t2,idx) */ * from t t1 join t t2 on t1.a=t2.a", // HashJoin
|
|
"select /*+ MERGE_JOIN(t1) */ /*+ USE_INDEX(t1,idx) */ /*+ USE_INDEX(t2,idx) */ * from t t1 join t t2 on t1.a=t2.a", // Shuffle+MergeJoin
|
|
"select /*+ INL_JOIN(t2) */ * from t t1 join t t2 on t1.a=t2.a;", // Index Join
|
|
"select /*+ INL_HASH_JOIN(t2) */ * from t t1 join t t2 on t1.a=t2.a;", // Index Hash Join
|
|
"select /*+ USE_INDEX(t, idx) */ * from t", // IndexScan
|
|
|
|
// With IndexLookUp
|
|
"select /*+ MERGE_JOIN(t1) */ /*+ USE_INDEX(t1,idx) */ /*+ USE_INDEX(t2,idx) */ * from s t1 join s t2 on t1.a=t2.a", // Shuffle+MergeJoin
|
|
"select /*+ INL_JOIN(t2) */ * from s t1 join s t2 on t1.a=t2.a;", // Index Join
|
|
"select /*+ INL_HASH_JOIN(t2) */ * from s t1 join s t2 on t1.a=t2.a;", // Index Hash Join
|
|
"select /*+ USE_INDEX(s, idx) */ * from s", // IndexLookUp
|
|
}
|
|
|
|
// Test 10 times panic for each Executor.
|
|
var res sqlexec.RecordSet
|
|
for _, sql := range sqls {
|
|
for i := 1; i <= 10; i++ {
|
|
concurrency := rand.Int31n(4) + 1 // test 1~5 concurrency randomly
|
|
tk.MustExec(fmt.Sprintf("set @@tidb_executor_concurrency=%v", concurrency))
|
|
tk.MustExec(fmt.Sprintf("set @@tidb_merge_join_concurrency=%v", concurrency))
|
|
tk.MustExec(fmt.Sprintf("set @@tidb_streamagg_concurrency=%v", concurrency))
|
|
distConcurrency := rand.Int31n(15) + 1
|
|
tk.MustExec(fmt.Sprintf("set @@tidb_distsql_scan_concurrency=%v", distConcurrency))
|
|
var err error
|
|
for err == nil {
|
|
res, err = tk.Exec(sql)
|
|
if err == nil {
|
|
_, err = session.GetRows4Test(context.Background(), tk.Session(), res)
|
|
require.NoError(t, res.Close())
|
|
}
|
|
}
|
|
require.EqualError(t, err, "failpoint panic: ERROR 1105 (HY000): Out Of Memory Quota![conn=1]")
|
|
}
|
|
}
|
|
}
|