529 lines
19 KiB
Go
529 lines
19 KiB
Go
// Copyright 2023 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package importer
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/url"
|
|
"os"
|
|
"os/user"
|
|
"path"
|
|
"path/filepath"
|
|
"runtime"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/docker/go-units"
|
|
"github.com/pingcap/failpoint"
|
|
"github.com/pingcap/tidb/pkg/config/kerneltype"
|
|
"github.com/pingcap/tidb/pkg/expression"
|
|
tidbkv "github.com/pingcap/tidb/pkg/kv"
|
|
"github.com/pingcap/tidb/pkg/lightning/config"
|
|
"github.com/pingcap/tidb/pkg/parser"
|
|
"github.com/pingcap/tidb/pkg/parser/ast"
|
|
plannercore "github.com/pingcap/tidb/pkg/planner/core"
|
|
"github.com/pingcap/tidb/pkg/planner/core/operator/physicalop"
|
|
plannerutil "github.com/pingcap/tidb/pkg/planner/util"
|
|
"github.com/pingcap/tidb/pkg/sessionctx/vardef"
|
|
"github.com/pingcap/tidb/pkg/testkit/testfailpoint"
|
|
"github.com/pingcap/tidb/pkg/types"
|
|
"github.com/pingcap/tidb/pkg/util/dbterror/exeerrors"
|
|
"github.com/pingcap/tidb/pkg/util/logutil"
|
|
"github.com/pingcap/tidb/pkg/util/mock"
|
|
"github.com/stretchr/testify/require"
|
|
tikvutil "github.com/tikv/client-go/v2/util"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
func TestInitDefaultOptions(t *testing.T) {
|
|
plan := &Plan{
|
|
DataSourceType: DataSourceTypeQuery,
|
|
}
|
|
plan.initDefaultOptions(context.Background(), 10, nil)
|
|
require.Equal(t, 2, plan.ThreadCnt)
|
|
|
|
plan = &Plan{
|
|
DataSourceType: DataSourceTypeFile,
|
|
}
|
|
vardef.CloudStorageURI.Store("s3://bucket/path")
|
|
t.Cleanup(func() {
|
|
vardef.CloudStorageURI.Store("")
|
|
})
|
|
plan.initDefaultOptions(context.Background(), 1, nil)
|
|
require.Equal(t, config.ByteSize(0), plan.DiskQuota)
|
|
require.Equal(t, config.OpLevelRequired, plan.Checksum)
|
|
require.Equal(t, 1, plan.ThreadCnt)
|
|
require.Equal(t, unlimitedWriteSpeed, plan.MaxWriteSpeed)
|
|
require.Equal(t, false, plan.SplitFile)
|
|
require.Equal(t, int64(100), plan.MaxRecordedErrors)
|
|
require.Equal(t, false, plan.Detached)
|
|
require.Equal(t, "utf8mb4", *plan.Charset)
|
|
require.Equal(t, false, plan.DisableTiKVImportMode)
|
|
if kerneltype.IsNextGen() {
|
|
require.Equal(t, config.DefaultBatchSize, plan.MaxEngineSize)
|
|
} else {
|
|
require.Equal(t, config.ByteSize(defaultMaxEngineSize), plan.MaxEngineSize)
|
|
}
|
|
|
|
require.Equal(t, "s3://bucket/path", plan.CloudStorageURI)
|
|
|
|
plan.initDefaultOptions(context.Background(), 10, nil)
|
|
require.Equal(t, 5, plan.ThreadCnt)
|
|
}
|
|
|
|
// for negative case see TestImportIntoOptionsNegativeCase
|
|
func TestInitOptionsPositiveCase(t *testing.T) {
|
|
sctx := mock.NewContext()
|
|
defer sctx.Close()
|
|
ctx := tikvutil.WithInternalSourceType(context.Background(), tidbkv.InternalImportInto)
|
|
|
|
convertOptions := func(inOptions []*ast.LoadDataOpt) []*plannercore.LoadDataOpt {
|
|
options := []*plannercore.LoadDataOpt{}
|
|
var err error
|
|
for _, opt := range inOptions {
|
|
loadDataOpt := plannercore.LoadDataOpt{Name: opt.Name}
|
|
if opt.Value != nil {
|
|
loadDataOpt.Value, err = plannerutil.RewriteAstExprWithPlanCtx(sctx, opt.Value, nil, nil, false)
|
|
require.NoError(t, err)
|
|
}
|
|
options = append(options, &loadDataOpt)
|
|
}
|
|
return options
|
|
}
|
|
|
|
sqlTemplate := "import into t from '/file.csv' with %s"
|
|
p := parser.New()
|
|
sql := fmt.Sprintf(sqlTemplate, characterSetOption+"='utf8', "+
|
|
fieldsTerminatedByOption+"='aaa', "+
|
|
fieldsEnclosedByOption+"='|', "+
|
|
fieldsEscapedByOption+"='', "+
|
|
fieldsDefinedNullByOption+"='N', "+
|
|
linesTerminatedByOption+"='END', "+
|
|
skipRowsOption+"=1, "+
|
|
diskQuotaOption+"='100gib', "+
|
|
checksumTableOption+"='optional', "+
|
|
threadOption+"=100000, "+
|
|
maxWriteSpeedOption+"='200mib', "+
|
|
splitFileOption+", "+
|
|
recordErrorsOption+"=123, "+
|
|
detachedOption+", "+
|
|
disableTiKVImportModeOption+", "+
|
|
maxEngineSizeOption+"='100gib', "+
|
|
disablePrecheckOption,
|
|
)
|
|
stmt, err := p.ParseOneStmt(sql, "", "")
|
|
require.NoError(t, err, sql)
|
|
plan := &Plan{Format: DataFormatCSV}
|
|
err = plan.initOptions(ctx, sctx, convertOptions(stmt.(*ast.ImportIntoStmt).Options))
|
|
require.NoError(t, err, sql)
|
|
require.Equal(t, "utf8", *plan.Charset, sql)
|
|
require.Equal(t, "aaa", plan.FieldsTerminatedBy, sql)
|
|
require.Equal(t, "|", plan.FieldsEnclosedBy, sql)
|
|
require.Equal(t, "", plan.FieldsEscapedBy, sql)
|
|
require.Equal(t, []string{"N"}, plan.FieldNullDef, sql)
|
|
require.Equal(t, "END", plan.LinesTerminatedBy, sql)
|
|
require.Equal(t, uint64(1), plan.IgnoreLines, sql)
|
|
require.Equal(t, config.ByteSize(100<<30), plan.DiskQuota, sql)
|
|
require.Equal(t, config.OpLevelOptional, plan.Checksum, sql)
|
|
require.Equal(t, runtime.GOMAXPROCS(0), plan.ThreadCnt, sql) // it's adjusted to the number of CPUs
|
|
require.Equal(t, config.ByteSize(200<<20), plan.MaxWriteSpeed, sql)
|
|
require.True(t, plan.SplitFile, sql)
|
|
require.Equal(t, int64(123), plan.MaxRecordedErrors, sql)
|
|
require.True(t, plan.Detached, sql)
|
|
require.True(t, plan.DisableTiKVImportMode, sql)
|
|
require.Equal(t, config.ByteSize(100<<30), plan.MaxEngineSize, sql)
|
|
require.Empty(t, plan.CloudStorageURI, sql)
|
|
require.True(t, plan.DisablePrecheck, sql)
|
|
|
|
// set cloud storage uri
|
|
vardef.CloudStorageURI.Store("s3://bucket/path")
|
|
t.Cleanup(func() {
|
|
vardef.CloudStorageURI.Store("")
|
|
})
|
|
plan = &Plan{Format: DataFormatCSV}
|
|
err = plan.initOptions(ctx, sctx, convertOptions(stmt.(*ast.ImportIntoStmt).Options))
|
|
require.NoError(t, err, sql)
|
|
require.Equal(t, "s3://bucket/path", plan.CloudStorageURI, sql)
|
|
|
|
// override cloud storage uri using option
|
|
sql2 := sql + ", " + cloudStorageURIOption + "='s3://bucket/path2'"
|
|
stmt, err = p.ParseOneStmt(sql2, "", "")
|
|
require.NoError(t, err, sql2)
|
|
plan = &Plan{Format: DataFormatCSV}
|
|
err = plan.initOptions(ctx, sctx, convertOptions(stmt.(*ast.ImportIntoStmt).Options))
|
|
require.NoError(t, err, sql2)
|
|
require.Equal(t, "s3://bucket/path2", plan.CloudStorageURI, sql2)
|
|
// override with gs
|
|
sql3 := sql + ", " + cloudStorageURIOption + "='gs://bucket/path2'"
|
|
stmt, err = p.ParseOneStmt(sql3, "", "")
|
|
require.NoError(t, err, sql3)
|
|
plan = &Plan{Format: DataFormatCSV}
|
|
err = plan.initOptions(ctx, sctx, convertOptions(stmt.(*ast.ImportIntoStmt).Options))
|
|
require.NoError(t, err, sql3)
|
|
require.Equal(t, "gs://bucket/path2", plan.CloudStorageURI, sql3)
|
|
// override with empty string, force use local sort
|
|
sql4 := sql + ", " + cloudStorageURIOption + "=''"
|
|
stmt, err = p.ParseOneStmt(sql4, "", "")
|
|
require.NoError(t, err, sql4)
|
|
plan = &Plan{Format: DataFormatCSV}
|
|
err = plan.initOptions(ctx, sctx, convertOptions(stmt.(*ast.ImportIntoStmt).Options))
|
|
require.NoError(t, err, sql4)
|
|
require.Equal(t, "", plan.CloudStorageURI, sql4)
|
|
}
|
|
|
|
func TestAdjustOptions(t *testing.T) {
|
|
plan := &Plan{
|
|
DiskQuota: 1,
|
|
ThreadCnt: 100000000,
|
|
MaxWriteSpeed: 10,
|
|
DataSourceType: DataSourceTypeFile,
|
|
}
|
|
plan.adjustOptions(16)
|
|
require.Equal(t, 16, plan.ThreadCnt)
|
|
require.Equal(t, config.ByteSize(10), plan.MaxWriteSpeed) // not adjusted
|
|
require.False(t, plan.DisableTiKVImportMode)
|
|
|
|
plan.ThreadCnt = 100000000
|
|
plan.DataSourceType = DataSourceTypeQuery
|
|
plan.adjustOptions(16)
|
|
require.Equal(t, 32, plan.ThreadCnt)
|
|
require.False(t, plan.DisableTiKVImportMode)
|
|
|
|
plan.CloudStorageURI = "s3://bucket/path"
|
|
plan.adjustOptions(16)
|
|
require.True(t, plan.DisableTiKVImportMode)
|
|
}
|
|
|
|
func TestAdjustDiskQuota(t *testing.T) {
|
|
err := failpoint.Enable("github.com/pingcap/tidb/pkg/lightning/common/GetStorageSize", "return(2048)")
|
|
require.NoError(t, err)
|
|
defer func() {
|
|
_ = failpoint.Disable("github.com/pingcap/tidb/pkg/lightning/common/GetStorageSize")
|
|
}()
|
|
d := t.TempDir()
|
|
require.Equal(t, int64(1638), adjustDiskQuota(0, d, logutil.BgLogger()))
|
|
require.Equal(t, int64(1), adjustDiskQuota(1, d, logutil.BgLogger()))
|
|
require.Equal(t, int64(1638), adjustDiskQuota(2000, d, logutil.BgLogger()))
|
|
}
|
|
|
|
func TestASTArgsFromStmt(t *testing.T) {
|
|
stmt := "IMPORT INTO tb (a, é) FROM 'gs://test-load/test.tsv';"
|
|
stmtNode, err := parser.New().ParseOneStmt(stmt, "latin1", "latin1_bin")
|
|
require.NoError(t, err)
|
|
text := stmtNode.Text()
|
|
require.Equal(t, stmt, text)
|
|
astArgs, err := ASTArgsFromStmt(text)
|
|
require.NoError(t, err)
|
|
importIntoStmt := stmtNode.(*ast.ImportIntoStmt)
|
|
require.Equal(t, astArgs.ColumnAssignments, importIntoStmt.ColumnAssignments)
|
|
require.Equal(t, astArgs.ColumnsAndUserVars, importIntoStmt.ColumnsAndUserVars)
|
|
}
|
|
|
|
func urlEqual(t *testing.T, expected, actual string) {
|
|
urlExpected, err := url.Parse(expected)
|
|
require.NoError(t, err)
|
|
urlGot, err := url.Parse(actual)
|
|
require.NoError(t, err)
|
|
// order of query parameters might change
|
|
require.Equal(t, urlExpected.Query(), urlGot.Query())
|
|
urlExpected.RawQuery, urlGot.RawQuery = "", ""
|
|
require.Equal(t, urlExpected.String(), urlGot.String())
|
|
}
|
|
|
|
func TestInitParameters(t *testing.T) {
|
|
// test redacted
|
|
p := &Plan{
|
|
Format: DataFormatCSV,
|
|
Path: "s3://bucket/path?access-key=111111&secret-access-key=222222",
|
|
}
|
|
require.NoError(t, p.initParameters(&plannercore.ImportInto{
|
|
Options: []*plannercore.LoadDataOpt{
|
|
{
|
|
Name: cloudStorageURIOption,
|
|
Value: &expression.Constant{
|
|
Value: types.NewStringDatum("s3://this-is-for-storage/path?access-key=aaaaaa&secret-access-key=bbbbbb"),
|
|
},
|
|
},
|
|
},
|
|
}))
|
|
urlEqual(t, "s3://bucket/path?access-key=xxxxxx&secret-access-key=xxxxxx", p.Parameters.FileLocation)
|
|
require.Len(t, p.Parameters.Options, 1)
|
|
urlEqual(t, "s3://this-is-for-storage/path?access-key=xxxxxx&secret-access-key=xxxxxx",
|
|
p.Parameters.Options[cloudStorageURIOption].(string))
|
|
|
|
// test other options
|
|
require.NoError(t, p.initParameters(&plannercore.ImportInto{
|
|
Options: []*plannercore.LoadDataOpt{
|
|
{
|
|
Name: detachedOption,
|
|
},
|
|
{
|
|
Name: threadOption,
|
|
Value: &expression.Constant{
|
|
Value: types.NewIntDatum(3),
|
|
},
|
|
},
|
|
},
|
|
}))
|
|
require.Len(t, p.Parameters.Options, 2)
|
|
require.Contains(t, p.Parameters.Options, detachedOption)
|
|
require.Equal(t, "3", p.Parameters.Options[threadOption])
|
|
}
|
|
|
|
func TestGetLocalBackendCfg(t *testing.T) {
|
|
c := &LoadDataController{
|
|
Plan: &Plan{},
|
|
}
|
|
cfg := c.getLocalBackendCfg("", "http://1.1.1.1:1234", "/tmp")
|
|
require.Equal(t, "http://1.1.1.1:1234", cfg.PDAddr)
|
|
require.Equal(t, "/tmp", cfg.LocalStoreDir)
|
|
require.True(t, cfg.DisableAutomaticCompactions)
|
|
require.Zero(t, cfg.RaftKV2SwitchModeDuration)
|
|
|
|
c.Plan.IsRaftKV2 = true
|
|
cfg = c.getLocalBackendCfg("", "http://1.1.1.1:1234", "/tmp")
|
|
require.Greater(t, cfg.RaftKV2SwitchModeDuration, time.Duration(0))
|
|
require.Equal(t, config.DefaultSwitchTiKVModeInterval, cfg.RaftKV2SwitchModeDuration)
|
|
}
|
|
|
|
func TestInitCompressedFiles(t *testing.T) {
|
|
username, err := user.Current()
|
|
require.NoError(t, err)
|
|
if username.Name == "root" {
|
|
t.Skip("it cannot run as root")
|
|
}
|
|
tempDir := t.TempDir()
|
|
ctx := context.Background()
|
|
|
|
for i := range 2048 {
|
|
fileName := filepath.Join(tempDir, fmt.Sprintf("test_%d.csv.gz", i))
|
|
require.NoError(t, os.WriteFile(fileName, []byte{}, 0o644))
|
|
}
|
|
|
|
testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/lightning/mydump/SampleFileCompressPercentage", `return(250)`)
|
|
c := LoadDataController{
|
|
Plan: &Plan{
|
|
Format: DataFormatCSV,
|
|
InImportInto: true,
|
|
Charset: &defaultCharacterSet,
|
|
LineFieldsInfo: newDefaultLineFieldsInfo(),
|
|
FieldNullDef: defaultFieldNullDef,
|
|
Parameters: &ImportParameters{},
|
|
},
|
|
logger: zap.NewExample(),
|
|
}
|
|
|
|
c.Path = filepath.Join(tempDir, "*.gz")
|
|
require.NoError(t, c.InitDataFiles(ctx))
|
|
}
|
|
|
|
func TestSupportedSuffixForServerDisk(t *testing.T) {
|
|
if kerneltype.IsNextGen() {
|
|
t.Skip("nextgen doesn't support import from server disk")
|
|
}
|
|
username, err := user.Current()
|
|
require.NoError(t, err)
|
|
if username.Name == "root" {
|
|
t.Skip("it cannot run as root")
|
|
}
|
|
tempDir := t.TempDir()
|
|
ctx := context.Background()
|
|
|
|
fileName := filepath.Join(tempDir, "test.csv")
|
|
require.NoError(t, os.WriteFile(fileName, []byte{}, 0o644))
|
|
fileName2 := filepath.Join(tempDir, "test.csv.gz")
|
|
require.NoError(t, os.WriteFile(fileName2, []byte{}, 0o644))
|
|
c := LoadDataController{
|
|
Plan: &Plan{
|
|
Format: DataFormatCSV,
|
|
InImportInto: true,
|
|
Charset: &defaultCharacterSet,
|
|
LineFieldsInfo: newDefaultLineFieldsInfo(),
|
|
FieldNullDef: defaultFieldNullDef,
|
|
Parameters: &ImportParameters{},
|
|
},
|
|
logger: zap.NewExample(),
|
|
}
|
|
// no suffix
|
|
c.Path = filepath.Join(tempDir, "test")
|
|
require.ErrorIs(t, c.InitDataFiles(ctx), exeerrors.ErrLoadDataInvalidURI)
|
|
// unknown suffix
|
|
c.Path = filepath.Join(tempDir, "test.abc")
|
|
require.ErrorIs(t, c.InitDataFiles(ctx), exeerrors.ErrLoadDataInvalidURI)
|
|
c.Path = fileName
|
|
require.NoError(t, c.InitDataFiles(ctx))
|
|
c.Path = fileName2
|
|
require.NoError(t, c.InitDataFiles(ctx))
|
|
|
|
var allData []string
|
|
for i := range 3 {
|
|
fileName := fmt.Sprintf("server-%d.csv", i)
|
|
var content []byte
|
|
rowCnt := 2
|
|
for j := range rowCnt {
|
|
content = append(content, fmt.Appendf(nil, "%d,test-%d\n", i*rowCnt+j, i*rowCnt+j)...)
|
|
allData = append(allData, fmt.Sprintf("%d test-%d", i*rowCnt+j, i*rowCnt+j))
|
|
}
|
|
require.NoError(t, os.WriteFile(path.Join(tempDir, fileName), content, 0o644))
|
|
}
|
|
// directory without permission
|
|
require.NoError(t, os.MkdirAll(path.Join(tempDir, "no-perm"), 0o700))
|
|
require.NoError(t, os.WriteFile(path.Join(tempDir, "no-perm", "no-perm.csv"), []byte("1,1"), 0o644))
|
|
require.NoError(t, os.Chmod(path.Join(tempDir, "no-perm"), 0o000))
|
|
t.Cleanup(func() {
|
|
// make sure TempDir RemoveAll cleanup works
|
|
_ = os.Chmod(path.Join(tempDir, "no-perm"), 0o700)
|
|
})
|
|
// file without permission
|
|
require.NoError(t, os.WriteFile(path.Join(tempDir, "no-perm.csv"), []byte("1,1"), 0o644))
|
|
require.NoError(t, os.Chmod(path.Join(tempDir, "no-perm.csv"), 0o000))
|
|
|
|
// relative path
|
|
c.Path = "~/file.csv"
|
|
err2 := c.InitDataFiles(ctx)
|
|
require.ErrorIs(t, err2, exeerrors.ErrLoadDataInvalidURI)
|
|
require.ErrorContains(t, err2, "URI of data source is invalid")
|
|
// non-exist parent directory
|
|
c.Path = "/path/to/non/exists/file.csv"
|
|
err = c.InitDataFiles(ctx)
|
|
require.ErrorIs(t, err, exeerrors.ErrLoadDataInvalidURI)
|
|
require.ErrorContains(t, err, "no such file or directory")
|
|
// without permission to parent dir
|
|
c.Path = path.Join(tempDir, "no-perm", "no-perm.csv")
|
|
err = c.InitDataFiles(ctx)
|
|
require.ErrorIs(t, err, exeerrors.ErrLoadDataCantRead)
|
|
require.ErrorContains(t, err, "permission denied")
|
|
// file not exists
|
|
c.Path = path.Join(tempDir, "not-exists.csv")
|
|
err = c.InitDataFiles(ctx)
|
|
require.ErrorIs(t, err, exeerrors.ErrLoadDataCantRead)
|
|
require.ErrorContains(t, err, "no such file or directory")
|
|
// file without permission
|
|
c.Path = path.Join(tempDir, "no-perm.csv")
|
|
err = c.InitDataFiles(ctx)
|
|
require.ErrorIs(t, err, exeerrors.ErrLoadDataCantRead)
|
|
require.ErrorContains(t, err, "permission denied")
|
|
// we don't have read access to 'no-perm' directory, so walk-dir fails
|
|
c.Path = path.Join(tempDir, "server-*.csv")
|
|
err = c.InitDataFiles(ctx)
|
|
require.ErrorIs(t, err, exeerrors.ErrLoadDataCantRead)
|
|
require.ErrorContains(t, err, "permission denied")
|
|
// grant read access to 'no-perm' directory, should ok now.
|
|
require.NoError(t, os.Chmod(path.Join(tempDir, "no-perm"), 0o400))
|
|
c.Path = path.Join(tempDir, "server-*.csv")
|
|
require.NoError(t, c.InitDataFiles(ctx))
|
|
// test glob matching pattern [12]
|
|
err = os.WriteFile(path.Join(tempDir, "glob-1.csv"), []byte("1,1"), 0o644)
|
|
require.NoError(t, err)
|
|
err = os.WriteFile(path.Join(tempDir, "glob-2.csv"), []byte("2,2"), 0o644)
|
|
require.NoError(t, err)
|
|
err = os.WriteFile(path.Join(tempDir, "glob-3.csv"), []byte("3,3"), 0o644)
|
|
require.NoError(t, err)
|
|
c.Path = path.Join(tempDir, "glob-[12].csv")
|
|
require.NoError(t, c.InitDataFiles(ctx))
|
|
gotPath := make([]string, 0, len(c.dataFiles))
|
|
for _, f := range c.dataFiles {
|
|
gotPath = append(gotPath, f.Path)
|
|
}
|
|
require.ElementsMatch(t, []string{"glob-1.csv", "glob-2.csv"}, gotPath)
|
|
// test glob matching pattern [2-3]
|
|
c.Path = path.Join(tempDir, "glob-[2-3].csv")
|
|
require.NoError(t, c.InitDataFiles(ctx))
|
|
gotPath = make([]string, 0, len(c.dataFiles))
|
|
for _, f := range c.dataFiles {
|
|
gotPath = append(gotPath, f.Path)
|
|
}
|
|
require.ElementsMatch(t, []string{"glob-2.csv", "glob-3.csv"}, gotPath)
|
|
|
|
testcases := []struct {
|
|
fileNames []string
|
|
expectFormat string
|
|
}{
|
|
{
|
|
expectFormat: DataFormatCSV,
|
|
fileNames: []string{"file1.CSV", "file1.csv.gz", "file1.csv.gz", "file1.CSV.GZIP", "file1.CSV.gzip", "file1.csv.zstd", "file1.csv.zst", "file1.csv.snappy"},
|
|
},
|
|
{
|
|
expectFormat: DataFormatSQL,
|
|
fileNames: []string{"file2.SQL", "file2.sql.gz", "file2.SQL.GZIP", "file2.sql.zstd", "file2.sql.zstd", "file2.sql.zst", "file2.sql.zst", "file2.sql.snappy"},
|
|
},
|
|
{
|
|
expectFormat: DataFormatParquet,
|
|
fileNames: []string{"file3.PARQUET", "file3.parquet.gz", "file3.PARQUET.GZIP", "file3.parquet.zstd", "file3.parquet.zst", "file3.parquet.snappy", "file3.parquet.snappy"},
|
|
},
|
|
}
|
|
|
|
testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/executor/importer/skipEstimateCompressionForParquet", "return(true)")
|
|
for _, testcase := range testcases {
|
|
for _, fileName := range testcase.fileNames {
|
|
c.Format = DataFormatAuto
|
|
c.Path = path.Join(tempDir, fileName)
|
|
err = os.WriteFile(c.Path, []byte{}, 0o644)
|
|
require.NoError(t, err)
|
|
require.NoError(t, c.InitDataFiles(ctx))
|
|
require.Equal(t, testcase.expectFormat, c.Format)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestGetDataSourceType(t *testing.T) {
|
|
require.Equal(t, DataSourceTypeQuery, getDataSourceType(&plannercore.ImportInto{
|
|
SelectPlan: &physicalop.PhysicalSelection{},
|
|
}))
|
|
require.Equal(t, DataSourceTypeFile, getDataSourceType(&plannercore.ImportInto{}))
|
|
}
|
|
func TestParseFileType(t *testing.T) {
|
|
testCases := []struct {
|
|
name string
|
|
path string
|
|
expected string
|
|
}{
|
|
// Basic file extensions
|
|
{name: "sql extension", path: "test.sql", expected: DataFormatSQL},
|
|
{name: "parquet extension", path: "data.parquet", expected: DataFormatParquet},
|
|
{name: "csv extension", path: "file.csv", expected: DataFormatCSV},
|
|
{name: "no extension", path: "noext", expected: DataFormatCSV},
|
|
// Single compression extension
|
|
{name: "sql with gz", path: "test.sql.gz", expected: DataFormatSQL},
|
|
{name: "parquet with zstd", path: "data.parquet.zst", expected: DataFormatParquet},
|
|
{name: "csv with snappy", path: "file.csv.snappy", expected: DataFormatCSV},
|
|
// Edge cases after removing compression
|
|
{name: "only compression extension", path: "file.gz", expected: DataFormatCSV},
|
|
{name: "non-recognized extension after compression", path: "document.txt.gz", expected: DataFormatCSV},
|
|
// Case insensitivity
|
|
{name: "uppercase extension", path: "TEST.SQL.GZ", expected: DataFormatSQL},
|
|
{name: "mixed case extension", path: "file.PARQUET.zst", expected: DataFormatParquet},
|
|
// Multiple dots in filename
|
|
{name: "multiple dots in name", path: "backup.file.sql.gz", expected: DataFormatSQL},
|
|
{name: "hidden file with compression", path: ".hidden.sql.gz", expected: DataFormatSQL},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
actual := parseFileType(tc.path)
|
|
require.Equal(t, tc.expected, actual)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestGetDefMaxEngineSize(t *testing.T) {
|
|
if kerneltype.IsClassic() {
|
|
require.Equal(t, config.ByteSize(500*units.GiB), getDefMaxEngineSize())
|
|
} else {
|
|
require.Equal(t, config.ByteSize(100*units.GiB), getDefMaxEngineSize())
|
|
}
|
|
}
|