Files
tidb/pkg/executor/importer/sampler_test.go

138 lines
4.3 KiB
Go

// Copyright 2025 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package importer
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"github.com/docker/go-units"
"github.com/pingcap/tidb/pkg/ddl"
"github.com/pingcap/tidb/pkg/meta/model"
"github.com/pingcap/tidb/pkg/parser"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/table/tables"
utilmock "github.com/pingcap/tidb/pkg/util/mock"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
)
func createDataFiles(t *testing.T, dir string, fileCount, rowsPerFile, rowLen int) {
t.Helper()
require.NoError(t, os.Mkdir(dir, 0o755))
padLen := rowLen - 4
require.GreaterOrEqual(t, padLen, 3)
padding := strings.Repeat("a", padLen/3)
var rowSB strings.Builder
rowSB.WriteString("1111")
for range 3 {
rowSB.WriteString(",")
rowSB.WriteString(padding)
}
rowSB.WriteString("\n")
rowData := rowSB.String()
var fileSB strings.Builder
for j := 0; j < rowsPerFile; j++ {
fileSB.WriteString(rowData)
}
for i := 0; i < fileCount; i++ {
require.NoError(t, os.WriteFile(filepath.Join(dir, fmt.Sprintf("%03d.csv", i)), []byte(fileSB.String()), 0o644))
}
}
type caseTp struct {
fileCount, rowsPerFile, rowLen int
ksCodec []byte
sql string
ratio float64
}
func runCaseFn(t *testing.T, i int, c caseTp) {
dir := filepath.Join(t.TempDir(), fmt.Sprintf("case-%d", i))
createDataFiles(t, dir, c.fileCount, c.rowsPerFile, c.rowLen)
p := parser.New()
node, err := p.ParseOneStmt(c.sql, "", "")
require.NoError(t, err)
sctx := utilmock.NewContext()
tblInfo, err := ddl.MockTableInfo(sctx, node.(*ast.CreateTableStmt), 1)
require.NoError(t, err)
tblInfo.State = model.StatePublic
table := tables.MockTableFromMeta(tblInfo)
ctrl, err := NewLoadDataController(&Plan{
Path: filepath.Join(dir, "*.csv"),
Format: DataFormatCSV,
LineFieldsInfo: newDefaultLineFieldsInfo(),
InImportInto: true,
}, table, &ASTArgs{})
require.NoError(t, err)
ctrl.logger = zap.Must(zap.NewDevelopment())
ctx := context.Background()
require.NoError(t, ctrl.InitDataFiles(ctx))
ratio, err := ctrl.sampleIndexSizeRatio(ctx, c.ksCodec)
require.NoError(t, err)
require.InDelta(t, c.ratio, ratio, 0.001)
}
func TestSampleIndexSizeRatio(t *testing.T) {
ksCodec := []byte{'x', 0x00, 0x00, 0x01}
simpleTbl := `create table t (a int, b text, c text, d text, index idx(a));`
cases := []caseTp{
// without ks codec
// no file
{0, 20, 100, nil, simpleTbl, 0},
// < 3 files
{1, 20, 100, nil, simpleTbl, 0.287},
{2, 20, 100, nil, simpleTbl, 0.287},
// < 3 files, not enough rows
{2, 8, 100, nil, simpleTbl, 0.287},
// enough files
{10, 20, 100, nil, simpleTbl, 0.287},
{10, 20, 100, nil,
`create table t (a int, b text, c text, d text, index idx(b(1024)));`, 0.568},
{10, 20, 100, nil,
`create table t (a int, b text, c text, d text, index idx1(a), index idx2(a), index idx3(a), index idx4(a));`, 1.151},
// enough files, not enough rows
{10, 5, 100, nil, simpleTbl, 0.287},
// longer rows
{10, 20, 400, nil, simpleTbl, 0.087},
{10, 12, 2000, nil, simpleTbl, 0.018},
{10, 12, 2000, nil,
`create table t (a int, b text, c text, d text, index idx1(a), index idx2(a), index idx3(a), index idx4(a));`, 0.074},
// with ks codec
{10, 20, 100, ksCodec, simpleTbl, 0.308},
}
for i, c := range cases {
t.Run(fmt.Sprintf("case-%d", i), func(t *testing.T) {
runCaseFn(t, i, c)
})
}
}
func TestSampleIndexSizeRatioVeryLongRows(t *testing.T) {
simpleTbl := `create table t (a int, b text, c text, d text, index idx(a));`
bak := maxSampleFileSize
maxSampleFileSize = 2 * units.MiB
t.Cleanup(func() {
maxSampleFileSize = bak
})
// early return when reach maxSampleFileSize
longRowCase := caseTp{10, 10, units.MiB + 100*units.KiB, nil, simpleTbl, 0}
runCaseFn(t, -1, longRowCase)
}