Files
tidb/pkg/disttask/framework/integrationtests/bench_test.go

213 lines
7.7 KiB
Go

// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package integrationtests
import (
"context"
"flag"
"fmt"
"net"
"net/http"
"net/http/pprof"
"testing"
"time"
"github.com/gorilla/mux"
"github.com/pingcap/tidb/pkg/disttask/framework/handle"
"github.com/pingcap/tidb/pkg/disttask/framework/proto"
"github.com/pingcap/tidb/pkg/disttask/framework/scheduler"
mockDispatch "github.com/pingcap/tidb/pkg/disttask/framework/scheduler/mock"
"github.com/pingcap/tidb/pkg/disttask/framework/storage"
"github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor"
"github.com/pingcap/tidb/pkg/disttask/framework/testutil"
"github.com/pingcap/tidb/pkg/domain"
"github.com/pingcap/tidb/pkg/parser/terror"
"github.com/pingcap/tidb/pkg/session"
"github.com/pingcap/tidb/pkg/store/driver"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/pkg/util"
"github.com/pingcap/tidb/pkg/util/metricsutil"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/stretchr/testify/require"
"go.opencensus.io/stats/view"
"go.uber.org/mock/gomock"
)
var (
maxConcurrentTask = flag.Int("max-concurrent-task", proto.MaxConcurrentTask, "max concurrent task")
waitDuration = flag.Duration("task-wait-duration", 2*time.Minute, "task wait duration")
schedulerInterval = flag.Duration("scheduler-interval", scheduler.CheckTaskFinishedInterval, "scheduler interval")
taskExecutorMgrInterval = flag.Duration("task-executor-mgr-interval", taskexecutor.TaskCheckInterval, "task executor mgr interval")
taskMetaSize = flag.Int("task-meta-size", 1<<10, "task meta size")
noTask = flag.Bool("no-task", false, "no task")
)
// test overhead when starting multiple schedulers
//
// make failpoint-enable
// GOOS=linux GOARCH=amd64 go test -tags intest -c -o bench.test ./pkg/disttask/framework/integrationtests
// make failpoint-disable
//
// bench.test -test.v -run ^$ -test.bench=BenchmarkSchedulerOverhead --with-tikv "upstream-pd:2379?disableGC=true"
func BenchmarkSchedulerOverhead(b *testing.B) {
ctx, cancel := context.WithCancel(context.Background())
statusWG := mockTiDBStatusPort(ctx, b)
defer func() {
cancel()
statusWG.Wait()
}()
schIntervalBak := scheduler.CheckTaskFinishedInterval
exeMgrIntervalBak := taskexecutor.TaskCheckInterval
bak := proto.MaxConcurrentTask
b.Cleanup(func() {
proto.MaxConcurrentTask = bak
scheduler.CheckTaskFinishedInterval = schIntervalBak
taskexecutor.TaskCheckInterval = exeMgrIntervalBak
})
proto.MaxConcurrentTask = *maxConcurrentTask
scheduler.CheckTaskFinishedInterval = *schedulerInterval
taskexecutor.TaskCheckInterval = *taskExecutorMgrInterval
b.Logf("max concurrent task: %d", proto.MaxConcurrentTask)
b.Logf("taks wait duration: %s", *waitDuration)
b.Logf("task meta size: %d", *taskMetaSize)
b.Logf("scheduler interval: %s", scheduler.CheckTaskFinishedInterval)
b.Logf("task executor mgr interval: %s", taskexecutor.TaskCheckInterval)
prepareForBenchTest(b)
c := testutil.NewTestDXFContext(b, 1, 2*proto.MaxConcurrentTask, false)
registerTaskTypeForBench(c)
if *noTask {
time.Sleep(*waitDuration)
} else {
// in this test, we will start 4*proto.MaxConcurrentTask tasks, but only
// proto.MaxConcurrentTask will be scheduled at the same time, for other
// tasks will be in queue only to check the performance of querying them.
for i := 0; i < 4*proto.MaxConcurrentTask; i++ {
taskKey := fmt.Sprintf("task-%03d", i)
taskMeta := make([]byte, *taskMetaSize)
_, err := handle.SubmitTask(c.Ctx, taskKey, proto.TaskTypeExample, 1, taskMeta)
require.NoError(c.T, err)
}
// task has 2 steps, each step has 1 subtask,wait in serial to reduce WaitTask check overhead.
// only wait first proto.MaxConcurrentTask and exit
time.Sleep(2 * *waitDuration)
for i := 0; i < proto.MaxConcurrentTask; i++ {
taskKey := fmt.Sprintf("task-%03d", i)
testutil.WaitTaskDoneOrPaused(c.Ctx, c.T, taskKey)
}
}
}
func prepareForBenchTest(b *testing.B) {
testkit.EnableFailPoint(b, "github.com/pingcap/tidb/pkg/domain/MockDisableDistTask", "return(true)")
var d driver.TiKVDriver
var err error
store, err := d.Open("tikv://" + *testkit.WithTiKV)
require.NoError(b, err)
var dom *domain.Domain
dom, err = session.BootstrapSession(store)
defer func() {
dom.Close()
err := store.Close()
require.NoError(b, err)
view.Stop()
}()
require.NoError(b, err)
tk := testkit.NewTestKit(b, store)
tk.MustExec("delete from mysql.tidb_global_task")
tk.MustExec("delete from mysql.tidb_global_task_history")
tk.MustExec("delete from mysql.tidb_background_subtask")
tk.MustExec("delete from mysql.tidb_background_subtask_history")
}
// we run this test on a k8s environment, so we need to mock the TiDB server status port
// to have metrics.
func mockTiDBStatusPort(ctx context.Context, b *testing.B) *util.WaitGroupWrapper {
var wg util.WaitGroupWrapper
err := metricsutil.RegisterMetrics()
terror.MustNil(err)
router := mux.NewRouter()
router.Handle("/metrics", promhttp.Handler())
serverMux := http.NewServeMux()
serverMux.Handle("/", router)
serverMux.HandleFunc("/debug/pprof/", pprof.Index)
serverMux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
serverMux.HandleFunc("/debug/pprof/profile", pprof.Profile)
serverMux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
serverMux.HandleFunc("/debug/pprof/trace", pprof.Trace)
statusListener, err := net.Listen("tcp", "0.0.0.0:10080")
require.NoError(b, err)
statusServer := &http.Server{Handler: serverMux}
wg.RunWithLog(func() {
if err := statusServer.Serve(statusListener); err != nil {
b.Logf("status server serve failed: %v", err)
}
})
wg.RunWithLog(func() {
<-ctx.Done()
_ = statusServer.Close()
})
return &wg
}
func registerTaskTypeForBench(c *testutil.TestDXFContext) {
stepTransition := map[proto.Step]proto.Step{
proto.StepInit: proto.StepOne,
proto.StepOne: proto.StepTwo,
proto.StepTwo: proto.StepDone,
}
schedulerExt := mockDispatch.NewMockExtension(c.MockCtrl)
schedulerExt.EXPECT().OnTick(gomock.Any(), gomock.Any()).Return().AnyTimes()
schedulerExt.EXPECT().GetEligibleInstances(gomock.Any(), gomock.Any()).Return(nil, nil).AnyTimes()
schedulerExt.EXPECT().IsRetryableErr(gomock.Any()).Return(false).AnyTimes()
schedulerExt.EXPECT().GetNextStep(gomock.Any()).DoAndReturn(
func(task *proto.TaskBase) proto.Step {
return stepTransition[task.Step]
},
).AnyTimes()
schedulerExt.EXPECT().OnNextSubtasksBatch(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn(
func(_ context.Context, _ storage.TaskHandle, task *proto.Task, _ []string, nextStep proto.Step) (metas [][]byte, err error) {
cnt := 1
res := make([][]byte, cnt)
for i := 0; i < cnt; i++ {
res[i] = []byte(task.Key)
}
return res, nil
},
).AnyTimes()
schedulerExt.EXPECT().OnDone(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
testutil.RegisterTaskMetaWithDXFCtx(c, schedulerExt, func(ctx context.Context, subtask *proto.Subtask) error {
select {
case <-ctx.Done():
taskManager, err := storage.GetTaskManager()
if err != nil {
return err
}
return taskManager.CancelTask(ctx, subtask.TaskID)
case <-time.After(*waitDuration):
}
return nil
})
}