Files
tidb/pkg/executor/importer/importer_testkit_test.go

379 lines
15 KiB
Go

// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package importer_test
import (
"context"
"fmt"
"os"
"path"
"testing"
"time"
"github.com/ngaut/pools"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/br/pkg/mock"
tidb "github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/disttask/framework/testutil"
"github.com/pingcap/tidb/pkg/executor/importer"
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/lightning/checkpoints"
"github.com/pingcap/tidb/pkg/lightning/common"
"github.com/pingcap/tidb/pkg/lightning/config"
"github.com/pingcap/tidb/pkg/lightning/mydump"
verify "github.com/pingcap/tidb/pkg/lightning/verification"
"github.com/pingcap/tidb/pkg/meta/autoid"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/model"
plannercore "github.com/pingcap/tidb/pkg/planner/core"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/session"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/dbterror/exeerrors"
"github.com/pingcap/tidb/pkg/util/logutil"
"github.com/stretchr/testify/require"
"go.etcd.io/etcd/tests/v3/integration"
"go.uber.org/mock/gomock"
"go.uber.org/zap"
)
func TestVerifyChecksum(t *testing.T) {
ctx := context.Background()
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
pool := pools.NewResourcePool(func() (pools.Resource, error) {
return tk.Session(), nil
}, 1, 1, time.Second)
defer pool.Close()
plan := &importer.Plan{
DBName: "db",
TableInfo: &model.TableInfo{
Name: model.NewCIStr("tb"),
},
Checksum: config.OpLevelRequired,
DistSQLScanConcurrency: 50,
}
tk.MustExec("create database db")
tk.MustExec("create table db.tb(id int)")
tk.MustExec("insert into db.tb values(1)")
// admin checksum table always return 1, 1, 1 for memory store
// Checksum = required
backupDistScanCon := tk.Session().GetSessionVars().DistSQLScanConcurrency()
require.Equal(t, variable.DefDistSQLScanConcurrency, backupDistScanCon)
localChecksum := verify.MakeKVChecksum(1, 1, 1)
err := importer.VerifyChecksum(ctx, plan, localChecksum, tk.Session(), logutil.BgLogger())
require.NoError(t, err)
require.Equal(t, backupDistScanCon, tk.Session().GetSessionVars().DistSQLScanConcurrency())
localChecksum = verify.MakeKVChecksum(1, 2, 1)
err = importer.VerifyChecksum(ctx, plan, localChecksum, tk.Session(), logutil.BgLogger())
require.ErrorIs(t, err, common.ErrChecksumMismatch)
// check a slow checksum can be canceled
plan2 := &importer.Plan{
DBName: "db",
TableInfo: &model.TableInfo{
Name: model.NewCIStr("tb2"),
},
Checksum: config.OpLevelRequired,
}
tk.MustExec(`
create table db.tb2(
id int,
index idx1(id),
index idx2(id),
index idx3(id),
index idx4(id),
index idx5(id),
index idx6(id),
index idx7(id),
index idx8(id),
index idx9(id),
index idx10(id)
)`)
tk.MustExec("insert into db.tb2 values(1)")
backup, err := tk.Session().GetSessionVars().GetSessionOrGlobalSystemVar(ctx, variable.TiDBChecksumTableConcurrency)
require.NoError(t, err)
err = tk.Session().GetSessionVars().SetSystemVar(variable.TiDBChecksumTableConcurrency, "1")
require.NoError(t, err)
require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/executor/afterHandleChecksumRequest", `sleep(1000)`))
ctx2, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
err = importer.VerifyChecksum(ctx2, plan2, localChecksum, tk.Session(), logutil.BgLogger())
require.ErrorContains(t, err, "Query execution was interrupted")
err = tk.Session().GetSessionVars().SetSystemVar(variable.TiDBChecksumTableConcurrency, backup)
require.NoError(t, err)
require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/executor/afterHandleChecksumRequest"))
require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/executor/importer/errWhenChecksum", `3*return(true)`))
defer func() {
require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/executor/importer/errWhenChecksum"))
}()
err = importer.VerifyChecksum(ctx, plan, localChecksum, tk.Session(), logutil.BgLogger())
require.ErrorContains(t, err, "occur an error when checksum")
// remote checksum success after retry
require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/executor/importer/errWhenChecksum", `1*return(true)`))
localChecksum = verify.MakeKVChecksum(1, 1, 1)
err = importer.VerifyChecksum(ctx, plan, localChecksum, tk.Session(), logutil.BgLogger())
require.NoError(t, err)
// checksum = optional
plan.Checksum = config.OpLevelOptional
require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/executor/importer/errWhenChecksum"))
localChecksum = verify.MakeKVChecksum(1, 1, 1)
err = importer.VerifyChecksum(ctx, plan, localChecksum, tk.Session(), logutil.BgLogger())
require.NoError(t, err)
localChecksum = verify.MakeKVChecksum(1, 2, 1)
err = importer.VerifyChecksum(ctx, plan, localChecksum, tk.Session(), logutil.BgLogger())
require.NoError(t, err)
require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/executor/importer/errWhenChecksum", `3*return(true)`))
err = importer.VerifyChecksum(ctx, plan, localChecksum, tk.Session(), logutil.BgLogger())
require.NoError(t, err)
// checksum = off
plan.Checksum = config.OpLevelOff
require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/executor/importer/errWhenChecksum"))
localChecksum = verify.MakeKVChecksum(1, 2, 1)
err = importer.VerifyChecksum(ctx, plan, localChecksum, tk.Session(), logutil.BgLogger())
require.NoError(t, err)
}
func TestGetTargetNodeCpuCnt(t *testing.T) {
_, tm, ctx := testutil.InitTableTest(t)
old := variable.EnableDistTask.Load()
variable.EnableDistTask.Store(false)
require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/util/cpu/mockNumCpu", "return(16)"))
t.Cleanup(func() {
require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/util/cpu/mockNumCpu"))
variable.EnableDistTask.Store(old)
})
require.NoError(t, tm.InitMeta(ctx, "tidb1", ""))
require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/util/cpu/mockNumCpu", "return(8)"))
targetNodeCPUCnt, err := importer.GetTargetNodeCPUCnt(ctx, importer.DataSourceTypeQuery, "")
require.NoError(t, err)
require.Equal(t, 8, targetNodeCPUCnt)
// invalid path
_, err = importer.GetTargetNodeCPUCnt(ctx, importer.DataSourceTypeFile, ":xx")
require.ErrorIs(t, err, exeerrors.ErrLoadDataInvalidURI)
// server disk import
targetNodeCPUCnt, err = importer.GetTargetNodeCPUCnt(ctx, importer.DataSourceTypeFile, "/path/to/xxx.csv")
require.NoError(t, err)
require.Equal(t, 8, targetNodeCPUCnt)
// disttask disabled
targetNodeCPUCnt, err = importer.GetTargetNodeCPUCnt(ctx, importer.DataSourceTypeFile, "s3://path/to/xxx.csv")
require.NoError(t, err)
require.Equal(t, 8, targetNodeCPUCnt)
// disttask enabled
variable.EnableDistTask.Store(true)
targetNodeCPUCnt, err = importer.GetTargetNodeCPUCnt(ctx, importer.DataSourceTypeFile, "s3://path/to/xxx.csv")
require.NoError(t, err)
require.Equal(t, 16, targetNodeCPUCnt)
}
func TestPostProcess(t *testing.T) {
ctx := context.Background()
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
pool := pools.NewResourcePool(func() (pools.Resource, error) {
return tk.Session(), nil
}, 1, 1, time.Second)
defer pool.Close()
tk.MustExec("create database db")
tk.MustExec("create table db.tb(id int primary key)")
tk.MustExec("insert into db.tb values(1)")
do, err := session.GetDomain(store)
require.NoError(t, err)
dbInfo, ok := do.InfoSchema().SchemaByName(model.NewCIStr("db"))
require.True(t, ok)
table, err := do.InfoSchema().TableByName(context.Background(), model.NewCIStr("db"), model.NewCIStr("tb"))
require.NoError(t, err)
plan := &importer.Plan{
DBID: dbInfo.ID,
DBName: "db",
TableInfo: table.Meta(),
DesiredTableInfo: table.Meta(),
Checksum: config.OpLevelRequired,
}
logger := zap.NewExample()
// verify checksum failed
localChecksum := verify.NewKVGroupChecksumForAdd()
localChecksum.AddRawGroup(verify.DataKVGroupID, 1, 2, 1)
require.ErrorIs(t, importer.PostProcess(ctx, tk.Session(), nil, plan, localChecksum, logger), common.ErrChecksumMismatch)
// success
localChecksum = verify.NewKVGroupChecksumForAdd()
localChecksum.AddRawGroup(verify.DataKVGroupID, 1, 1, 1)
require.NoError(t, importer.PostProcess(ctx, tk.Session(), nil, plan, localChecksum, logger))
// rebase success
tk.MustExec("create table db.tb2(id int auto_increment primary key)")
table, err = do.InfoSchema().TableByName(context.Background(), model.NewCIStr("db"), model.NewCIStr("tb2"))
require.NoError(t, err)
plan.TableInfo, plan.DesiredTableInfo = table.Meta(), table.Meta()
integration.BeforeTestExternal(t)
testEtcdCluster := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
t.Cleanup(func() {
testEtcdCluster.Terminate(t)
})
tidbCfg := tidb.GetGlobalConfig()
pathBak := tidbCfg.Path
defer func() {
tidbCfg.Path = pathBak
}()
tidbCfg.Path = testEtcdCluster.Members[0].ClientURLs[0].String()
require.NoError(t, importer.PostProcess(ctx, tk.Session(), map[autoid.AllocatorType]int64{
autoid.RowIDAllocType: 123,
}, plan, localChecksum, logger))
allocators := table.Allocators(tk.Session().GetTableCtx())
nextGlobalAutoID, err := allocators.Get(autoid.RowIDAllocType).NextGlobalAutoID()
require.NoError(t, err)
require.Equal(t, int64(124), nextGlobalAutoID)
tk.MustExec("insert into db.tb2 values(default)")
tk.MustQuery("select * from db.tb2").Check(testkit.Rows("124"))
}
func getTableImporter(ctx context.Context, t *testing.T, store kv.Storage, tableName, path string, opts []*plannercore.LoadDataOpt) *importer.TableImporter {
tk := testkit.NewTestKit(t, store)
do, err := session.GetDomain(store)
require.NoError(t, err)
dbInfo, ok := do.InfoSchema().SchemaByName(model.NewCIStr("test"))
require.True(t, ok)
table, err := do.InfoSchema().TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr(tableName))
require.NoError(t, err)
var selectPlan base.PhysicalPlan
if path == "" {
selectPlan = &plannercore.PhysicalSelection{}
}
plan, err := importer.NewImportPlan(ctx, tk.Session(), &plannercore.ImportInto{
Path: path,
Table: &ast.TableName{Name: table.Meta().Name, DBInfo: dbInfo},
Options: opts,
SelectPlan: selectPlan,
}, table)
require.NoError(t, err)
controller, err := importer.NewLoadDataController(plan, table, &importer.ASTArgs{})
require.NoError(t, err)
if path != "" {
require.NoError(t, controller.InitDataStore(ctx))
}
ti, err := importer.NewTableImporterForTest(ctx, controller, "11", &storeHelper{kvStore: store})
require.NoError(t, err)
return ti
}
func TestProcessChunkWith(t *testing.T) {
ctx := context.Background()
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
ctrl := gomock.NewController(t)
defer ctrl.Finish()
tidbCfg := tidb.GetGlobalConfig()
tidbCfg.TempDir = t.TempDir()
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c int)")
fileName := path.Join(tidbCfg.TempDir, "test.csv")
sourceData := []byte("1,2,3\n4,5,6\n7,8,9\n")
require.NoError(t, os.WriteFile(fileName, sourceData, 0o644))
keyspace := store.GetCodec().GetKeyspace()
t.Run("file chunk", func(t *testing.T) {
chunkInfo := &checkpoints.ChunkCheckpoint{
FileMeta: mydump.SourceFileMeta{Type: mydump.SourceTypeCSV, Path: "test.csv"},
Chunk: mydump.Chunk{EndOffset: int64(len(sourceData)), RowIDMax: 10000},
}
ti := getTableImporter(ctx, t, store, "t", fileName, []*plannercore.LoadDataOpt{
{Name: "skip_rows", Value: expression.NewInt64Const(1)}})
defer ti.Backend().CloseEngineMgr()
kvWriter := mock.NewMockEngineWriter(ctrl)
kvWriter.EXPECT().AppendRows(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
progress := importer.NewProgress()
checksum := verify.NewKVGroupChecksumWithKeyspace(keyspace)
err := importer.ProcessChunkWithWriter(ctx, chunkInfo, ti, kvWriter, kvWriter, progress, zap.NewExample(), checksum)
require.NoError(t, err)
require.Len(t, progress.GetColSize(), 3)
checksumMap := checksum.GetInnerChecksums()
require.Len(t, checksumMap, 1)
require.Equal(t, verify.MakeKVChecksum(74, 2, 15625182175392723123), *checksumMap[verify.DataKVGroupID])
})
t.Run("query chunk", func(t *testing.T) {
chunkInfo := &checkpoints.ChunkCheckpoint{
FileMeta: mydump.SourceFileMeta{Type: mydump.SourceTypeCSV, Path: "test.csv"},
Chunk: mydump.Chunk{EndOffset: int64(len(sourceData)), RowIDMax: 10000},
}
ti := getTableImporter(ctx, t, store, "t", "", nil)
defer ti.Backend().CloseEngineMgr()
rowsCh := make(chan importer.QueryRow, 3)
for i := 1; i <= 3; i++ {
rowsCh <- importer.QueryRow{
ID: int64(i),
Data: []types.Datum{
types.NewIntDatum(int64((i-1)*3 + 1)),
types.NewIntDatum(int64((i-1)*3 + 2)),
types.NewIntDatum(int64((i-1)*3 + 3)),
},
}
}
close(rowsCh)
ti.SetSelectedRowCh(rowsCh)
kvWriter := mock.NewMockEngineWriter(ctrl)
kvWriter.EXPECT().AppendRows(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
progress := importer.NewProgress()
checksum := verify.NewKVGroupChecksumWithKeyspace(keyspace)
err := importer.ProcessChunkWithWriter(ctx, chunkInfo, ti, kvWriter, kvWriter, progress, zap.NewExample(), checksum)
require.NoError(t, err)
require.Len(t, progress.GetColSize(), 3)
checksumMap := checksum.GetInnerChecksums()
require.Len(t, checksumMap, 1)
require.Equal(t, verify.MakeKVChecksum(111, 3, 14231358899564314836), *checksumMap[verify.DataKVGroupID])
})
}
func TestPopulateChunks(t *testing.T) {
ctx := context.Background()
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tidbCfg := tidb.GetGlobalConfig()
tidbCfg.TempDir = t.TempDir()
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c int)")
require.NoError(t, os.WriteFile(path.Join(tidbCfg.TempDir, "test-01.csv"),
[]byte("1,2,3\n4,5,6\n7,8,9\n"), 0o644))
require.NoError(t, os.WriteFile(path.Join(tidbCfg.TempDir, "test-02.csv"),
[]byte("8,8,8\n"), 0o644))
require.NoError(t, os.WriteFile(path.Join(tidbCfg.TempDir, "test-03.csv"),
[]byte("9,9,9\n10,10,10\n"), 0o644))
ti := getTableImporter(ctx, t, store, "t", fmt.Sprintf("%s/test-*.csv", tidbCfg.TempDir), []*plannercore.LoadDataOpt{{Name: "__max_engine_size", Value: expression.NewStrConst("20")}})
defer ti.Backend().CloseEngineMgr()
require.NoError(t, ti.InitDataFiles(ctx))
engines, err := ti.PopulateChunks(ctx)
require.NoError(t, err)
require.Len(t, engines, 3)
require.Len(t, engines[0].Chunks, 2)
require.Len(t, engines[1].Chunks, 1)
}