1636 lines
58 KiB
Go
1636 lines
58 KiB
Go
// Copyright 2019 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package config
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math"
|
|
"net"
|
|
"net/url"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"github.com/BurntSushi/toml"
|
|
"github.com/docker/go-units"
|
|
gomysql "github.com/go-sql-driver/mysql"
|
|
"github.com/pingcap/errors"
|
|
tidbcfg "github.com/pingcap/tidb/pkg/config"
|
|
"github.com/pingcap/tidb/pkg/lightning/common"
|
|
"github.com/pingcap/tidb/pkg/lightning/log"
|
|
"github.com/pingcap/tidb/pkg/parser/mysql"
|
|
"github.com/pingcap/tidb/pkg/util"
|
|
filter "github.com/pingcap/tidb/pkg/util/table-filter"
|
|
router "github.com/pingcap/tidb/pkg/util/table-router"
|
|
"go.uber.org/atomic"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// constants for config items
|
|
const (
|
|
// ImportMode defines mode of import for tikv.
|
|
ImportMode = "import"
|
|
// NormalMode defines mode of normal for tikv.
|
|
NormalMode = "normal"
|
|
|
|
// BackendTiDB is a constant for choosing the "TiDB" backend in the configuration.
|
|
BackendTiDB = "tidb"
|
|
// BackendLocal is a constant for choosing the "Local" backup in the configuration.
|
|
// In this mode, we write & sort kv pairs with local storage and directly write them to tikv.
|
|
BackendLocal = "local"
|
|
|
|
// CheckpointDriverMySQL is a constant for choosing the "MySQL" checkpoint driver in the configuration.
|
|
CheckpointDriverMySQL = "mysql"
|
|
// CheckpointDriverFile is a constant for choosing the "File" checkpoint driver in the configuration.
|
|
CheckpointDriverFile = "file"
|
|
|
|
// KVWriteBatchSize batch size when write to TiKV.
|
|
// this is the default value of linux send buffer size(net.ipv4.tcp_wmem) too.
|
|
KVWriteBatchSize = 16 * units.KiB
|
|
DefaultRangeConcurrency = 16
|
|
|
|
defaultDistSQLScanConcurrency = 15
|
|
defaultBuildStatsConcurrency = 20
|
|
defaultIndexSerialScanConcurrency = 20
|
|
defaultChecksumTableConcurrency = 2
|
|
DefaultTableConcurrency = 6
|
|
defaultIndexConcurrency = 2
|
|
DefaultRegionCheckBackoffLimit = 1800
|
|
DefaultRegionSplitBatchSize = 4096
|
|
defaultLogicalImportBatchSize = 96 * units.KiB
|
|
defaultLogicalImportBatchRows = 65536
|
|
|
|
// defaultMetaSchemaName is the default database name used to store lightning metadata
|
|
defaultMetaSchemaName = "lightning_metadata"
|
|
defaultTaskInfoSchemaName = "lightning_task_info"
|
|
DefaultRecordDuplicateThreshold = 10000
|
|
|
|
// autoDiskQuotaLocalReservedSpeed is the estimated size increase per
|
|
// millisecond per write thread the local backend may gain on all engines.
|
|
// This is used to compute the maximum size overshoot between two disk quota
|
|
// checks, if the first one has barely passed.
|
|
//
|
|
// With cron.check-disk-quota = 1m, region-concurrency = 40, this should
|
|
// contribute 2.3 GiB to the reserved size.
|
|
// autoDiskQuotaLocalReservedSpeed uint64 = 1 * units.KiB
|
|
|
|
DefaultEngineMemCacheSize = 512 * units.MiB
|
|
DefaultLocalWriterMemCacheSize = 128 * units.MiB
|
|
DefaultBlockSize = 16 * units.KiB
|
|
|
|
defaultCSVDataCharacterSet = "binary"
|
|
defaultCSVDataInvalidCharReplace = utf8.RuneError
|
|
|
|
DefaultSwitchTiKVModeInterval = 5 * time.Minute
|
|
)
|
|
|
|
var (
|
|
supportedStorageTypes = []string{"file", "local", "s3", "noop", "gcs", "gs"}
|
|
|
|
defaultFilter = []string{
|
|
"*.*",
|
|
"!mysql.*",
|
|
"!sys.*",
|
|
"!INFORMATION_SCHEMA.*",
|
|
"!PERFORMANCE_SCHEMA.*",
|
|
"!METRICS_SCHEMA.*",
|
|
"!INSPECTION_SCHEMA.*",
|
|
}
|
|
)
|
|
|
|
// GetDefaultFilter gets the default table filter used in Lightning.
|
|
// It clones the original default filter,
|
|
// so that the original value won't be changed when the returned slice's element is changed.
|
|
func GetDefaultFilter() []string {
|
|
return append([]string{}, defaultFilter...)
|
|
}
|
|
|
|
// DBStore is the database connection information.
|
|
type DBStore struct {
|
|
Host string `toml:"host" json:"host"`
|
|
Port int `toml:"port" json:"port"`
|
|
User string `toml:"user" json:"user"`
|
|
Psw string `toml:"password" json:"-"`
|
|
StatusPort int `toml:"status-port" json:"status-port"`
|
|
PdAddr string `toml:"pd-addr" json:"pd-addr"`
|
|
StrSQLMode string `toml:"sql-mode" json:"sql-mode"`
|
|
TLS string `toml:"tls" json:"tls"`
|
|
Security *Security `toml:"security" json:"security"`
|
|
|
|
SQLMode mysql.SQLMode `toml:"-" json:"-"`
|
|
MaxAllowedPacket uint64 `toml:"max-allowed-packet" json:"max-allowed-packet"`
|
|
|
|
DistSQLScanConcurrency int `toml:"distsql-scan-concurrency" json:"distsql-scan-concurrency"`
|
|
BuildStatsConcurrency int `toml:"build-stats-concurrency" json:"build-stats-concurrency"`
|
|
IndexSerialScanConcurrency int `toml:"index-serial-scan-concurrency" json:"index-serial-scan-concurrency"`
|
|
ChecksumTableConcurrency int `toml:"checksum-table-concurrency" json:"checksum-table-concurrency"`
|
|
Vars map[string]string `toml:"session-vars" json:"vars"`
|
|
|
|
IOTotalBytes *atomic.Uint64 `toml:"-" json:"-"`
|
|
UUID string `toml:"-" json:"-"`
|
|
}
|
|
|
|
// adjust assigns default values and check illegal values. The arguments must be
|
|
// adjusted before calling this function.
|
|
func (d *DBStore) adjust(
|
|
ctx context.Context,
|
|
i *TikvImporter,
|
|
s *Security,
|
|
tlsObj *common.TLS,
|
|
) error {
|
|
if i.Backend == BackendLocal {
|
|
if d.BuildStatsConcurrency == 0 {
|
|
d.BuildStatsConcurrency = defaultBuildStatsConcurrency
|
|
}
|
|
if d.IndexSerialScanConcurrency == 0 {
|
|
d.IndexSerialScanConcurrency = defaultIndexSerialScanConcurrency
|
|
}
|
|
if d.ChecksumTableConcurrency == 0 {
|
|
d.ChecksumTableConcurrency = defaultChecksumTableConcurrency
|
|
}
|
|
}
|
|
var err error
|
|
d.SQLMode, err = mysql.GetSQLMode(d.StrSQLMode)
|
|
if err != nil {
|
|
return common.ErrInvalidConfig.Wrap(err).GenWithStack("`mydumper.tidb.sql_mode` must be a valid SQL_MODE")
|
|
}
|
|
|
|
if d.Security == nil {
|
|
d.Security = &Security{
|
|
CAPath: s.CAPath,
|
|
CertPath: s.CertPath,
|
|
KeyPath: s.KeyPath,
|
|
CABytes: s.CABytes,
|
|
CertBytes: s.CertBytes,
|
|
KeyBytes: s.KeyBytes,
|
|
RedactInfoLog: s.RedactInfoLog,
|
|
TLSConfig: s.TLSConfig,
|
|
AllowFallbackToPlaintext: s.AllowFallbackToPlaintext,
|
|
}
|
|
}
|
|
|
|
switch d.TLS {
|
|
case "preferred":
|
|
d.Security.AllowFallbackToPlaintext = true
|
|
fallthrough
|
|
case "skip-verify":
|
|
if d.Security.TLSConfig == nil {
|
|
/* #nosec G402 */
|
|
d.Security.TLSConfig = &tls.Config{
|
|
MinVersion: tls.VersionTLS12,
|
|
InsecureSkipVerify: true,
|
|
NextProtos: []string{"h2", "http/1.1"}, // specify `h2` to let Go use HTTP/2.
|
|
}
|
|
} else {
|
|
d.Security.TLSConfig.InsecureSkipVerify = true
|
|
}
|
|
case "cluster":
|
|
if len(s.CAPath) == 0 {
|
|
return common.ErrInvalidConfig.GenWithStack("cannot set `tidb.tls` to 'cluster' without a [security] section")
|
|
}
|
|
case "":
|
|
case "false":
|
|
d.Security.TLSConfig = nil
|
|
d.Security.CAPath = ""
|
|
d.Security.CertPath = ""
|
|
d.Security.CABytes = nil
|
|
d.Security.CertBytes = nil
|
|
d.Security.KeyPath = ""
|
|
d.Security.KeyBytes = nil
|
|
default:
|
|
return common.ErrInvalidConfig.GenWithStack("unsupported `tidb.tls` config %s", d.TLS)
|
|
}
|
|
|
|
mustHaveInternalConnections := i.Backend == BackendLocal
|
|
// automatically determine the TiDB port & PD address from TiDB settings
|
|
if mustHaveInternalConnections && (d.Port <= 0 || len(d.PdAddr) == 0) {
|
|
var settings tidbcfg.Config
|
|
err = tlsObj.GetJSON(ctx, "/settings", &settings)
|
|
if err != nil {
|
|
return common.ErrInvalidConfig.Wrap(err).GenWithStack("cannot fetch settings from TiDB, please manually fill in `tidb.port` and `tidb.pd-addr`")
|
|
}
|
|
if d.Port <= 0 {
|
|
d.Port = int(settings.Port)
|
|
}
|
|
if len(d.PdAddr) == 0 {
|
|
// verify that it is not a empty string
|
|
pdAddrs := strings.Split(settings.Path, ",")
|
|
for _, ip := range pdAddrs {
|
|
ipPort := strings.Split(ip, ":")
|
|
if len(ipPort[0]) == 0 {
|
|
return common.ErrInvalidConfig.GenWithStack("invalid `tidb.pd-addr` setting")
|
|
}
|
|
if len(ipPort[1]) == 0 || ipPort[1] == "0" {
|
|
return common.ErrInvalidConfig.GenWithStack("invalid `tidb.port` setting")
|
|
}
|
|
}
|
|
d.PdAddr = settings.Path
|
|
}
|
|
}
|
|
|
|
if d.Port <= 0 {
|
|
return common.ErrInvalidConfig.GenWithStack("invalid `tidb.port` setting")
|
|
}
|
|
|
|
if mustHaveInternalConnections && len(d.PdAddr) == 0 {
|
|
return common.ErrInvalidConfig.GenWithStack("invalid `tidb.pd-addr` setting")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Routes is a alias of []*router.TableRule. It's used to attach method to []*router.TableRule.
|
|
type Routes []*router.TableRule
|
|
|
|
func (r *Routes) adjust(m *MydumperRuntime) error {
|
|
for _, rule := range *r {
|
|
if !m.CaseSensitive {
|
|
rule.ToLower()
|
|
}
|
|
if err := rule.Valid(); err != nil {
|
|
return common.ErrInvalidConfig.Wrap(err).GenWithStack("file route rule is invalid")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Config is the configuration.
|
|
type Config struct {
|
|
TaskID int64 `toml:"-" json:"id"`
|
|
|
|
App Lightning `toml:"lightning" json:"lightning"`
|
|
TiDB DBStore `toml:"tidb" json:"tidb"`
|
|
|
|
Checkpoint Checkpoint `toml:"checkpoint" json:"checkpoint"`
|
|
Mydumper MydumperRuntime `toml:"mydumper" json:"mydumper"`
|
|
TikvImporter TikvImporter `toml:"tikv-importer" json:"tikv-importer"`
|
|
PostRestore PostRestore `toml:"post-restore" json:"post-restore"`
|
|
Cron Cron `toml:"cron" json:"cron"`
|
|
Routes Routes `toml:"routes" json:"routes"`
|
|
Security Security `toml:"security" json:"security"`
|
|
Conflict Conflict `toml:"conflict" json:"conflict"`
|
|
}
|
|
|
|
// String implements fmt.Stringer interface.
|
|
func (cfg *Config) String() string {
|
|
bytes, err := json.Marshal(cfg)
|
|
if err != nil {
|
|
log.L().Error("marshal config to json error", log.ShortError(err))
|
|
}
|
|
return string(bytes)
|
|
}
|
|
|
|
// ToTLS creates a common.TLS from the config.
|
|
func (cfg *Config) ToTLS() (*common.TLS, error) {
|
|
hostPort := net.JoinHostPort(cfg.TiDB.Host, strconv.Itoa(cfg.TiDB.StatusPort))
|
|
return common.NewTLS(
|
|
cfg.Security.CAPath,
|
|
cfg.Security.CertPath,
|
|
cfg.Security.KeyPath,
|
|
hostPort,
|
|
cfg.Security.CABytes,
|
|
cfg.Security.CertBytes,
|
|
cfg.Security.KeyBytes,
|
|
)
|
|
}
|
|
|
|
// Lightning is the root configuration of lightning.
|
|
type Lightning struct {
|
|
TableConcurrency int `toml:"table-concurrency" json:"table-concurrency"`
|
|
IndexConcurrency int `toml:"index-concurrency" json:"index-concurrency"`
|
|
RegionConcurrency int `toml:"region-concurrency" json:"region-concurrency"`
|
|
IOConcurrency int `toml:"io-concurrency" json:"io-concurrency"`
|
|
CheckRequirements bool `toml:"check-requirements" json:"check-requirements"`
|
|
MetaSchemaName string `toml:"meta-schema-name" json:"meta-schema-name"`
|
|
|
|
MaxError MaxError `toml:"max-error" json:"max-error"`
|
|
// deprecated, use Conflict.MaxRecordRows instead
|
|
MaxErrorRecords int64 `toml:"max-error-records" json:"max-error-records"`
|
|
TaskInfoSchemaName string `toml:"task-info-schema-name" json:"task-info-schema-name"`
|
|
}
|
|
|
|
// adjust assigns default values and check illegal values. The input TikvImporter
|
|
// must be adjusted before calling this function.
|
|
func (l *Lightning) adjust(i *TikvImporter) {
|
|
switch i.Backend {
|
|
case BackendTiDB:
|
|
if l.TableConcurrency == 0 {
|
|
l.TableConcurrency = l.RegionConcurrency
|
|
}
|
|
if l.IndexConcurrency == 0 {
|
|
l.IndexConcurrency = l.RegionConcurrency
|
|
}
|
|
case BackendLocal:
|
|
if l.IndexConcurrency == 0 {
|
|
l.IndexConcurrency = defaultIndexConcurrency
|
|
}
|
|
if l.TableConcurrency == 0 {
|
|
l.TableConcurrency = DefaultTableConcurrency
|
|
}
|
|
|
|
if len(l.MetaSchemaName) == 0 {
|
|
l.MetaSchemaName = defaultMetaSchemaName
|
|
}
|
|
// RegionConcurrency > NumCPU is meaningless.
|
|
cpuCount := runtime.NumCPU()
|
|
if l.RegionConcurrency > cpuCount {
|
|
l.RegionConcurrency = cpuCount
|
|
}
|
|
}
|
|
}
|
|
|
|
// PostOpLevel represents the level of post-operation.
|
|
type PostOpLevel int
|
|
|
|
// PostOpLevel constants.
|
|
const (
|
|
OpLevelOff PostOpLevel = iota
|
|
OpLevelOptional
|
|
OpLevelRequired
|
|
)
|
|
|
|
// UnmarshalTOML implements toml.Unmarshaler interface.
|
|
func (t *PostOpLevel) UnmarshalTOML(v any) error {
|
|
switch val := v.(type) {
|
|
case bool:
|
|
if val {
|
|
*t = OpLevelRequired
|
|
} else {
|
|
*t = OpLevelOff
|
|
}
|
|
case string:
|
|
return t.FromStringValue(val)
|
|
default:
|
|
return errors.Errorf("invalid op level '%v', please choose valid option between ['off', 'optional', 'required']", v)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MarshalText implements encoding.TextMarshaler interface.
|
|
func (t PostOpLevel) MarshalText() ([]byte, error) {
|
|
return []byte(t.String()), nil
|
|
}
|
|
|
|
// FromStringValue parse command line parameter.
|
|
func (t *PostOpLevel) FromStringValue(s string) error {
|
|
switch strings.ToLower(s) {
|
|
//nolint:goconst // This 'false' and other 'false's aren't the same.
|
|
case "off", "false":
|
|
*t = OpLevelOff
|
|
case "required", "true":
|
|
*t = OpLevelRequired
|
|
case "optional":
|
|
*t = OpLevelOptional
|
|
default:
|
|
return errors.Errorf("invalid op level '%s', please choose valid option between ['off', 'optional', 'required']", s)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MarshalJSON implements json.Marshaler interface.
|
|
func (t *PostOpLevel) MarshalJSON() ([]byte, error) {
|
|
return []byte(`"` + t.String() + `"`), nil
|
|
}
|
|
|
|
// UnmarshalJSON implements json.Unmarshaler interface.
|
|
func (t *PostOpLevel) UnmarshalJSON(data []byte) error {
|
|
return t.FromStringValue(strings.Trim(string(data), `"`))
|
|
}
|
|
|
|
// String returns the string representation of the level.
|
|
func (t PostOpLevel) String() string {
|
|
switch t {
|
|
case OpLevelOff:
|
|
return "off"
|
|
case OpLevelOptional:
|
|
return "optional"
|
|
case OpLevelRequired:
|
|
return "required"
|
|
default:
|
|
panic(fmt.Sprintf("invalid post process type '%d'", t))
|
|
}
|
|
}
|
|
|
|
// CheckpointKeepStrategy represents the strategy to keep checkpoint data.
|
|
type CheckpointKeepStrategy int
|
|
|
|
const (
|
|
// CheckpointRemove remove checkpoint data
|
|
CheckpointRemove CheckpointKeepStrategy = iota
|
|
// CheckpointRename keep by rename checkpoint file/db according to task id
|
|
CheckpointRename
|
|
// CheckpointOrigin keep checkpoint data unchanged
|
|
CheckpointOrigin
|
|
)
|
|
|
|
// UnmarshalTOML implements toml.Unmarshaler interface.
|
|
func (t *CheckpointKeepStrategy) UnmarshalTOML(v any) error {
|
|
switch val := v.(type) {
|
|
case bool:
|
|
if val {
|
|
*t = CheckpointRename
|
|
} else {
|
|
*t = CheckpointRemove
|
|
}
|
|
case string:
|
|
return t.FromStringValue(val)
|
|
default:
|
|
return errors.Errorf("invalid checkpoint keep strategy '%v', please choose valid option between ['remove', 'rename', 'origin']", v)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MarshalText implements encoding.TextMarshaler interface.
|
|
func (t CheckpointKeepStrategy) MarshalText() ([]byte, error) {
|
|
return []byte(t.String()), nil
|
|
}
|
|
|
|
// FromStringValue parser command line parameter.
|
|
func (t *CheckpointKeepStrategy) FromStringValue(s string) error {
|
|
switch strings.ToLower(s) {
|
|
//nolint:goconst // This 'false' and other 'false's aren't the same.
|
|
case "remove", "false":
|
|
*t = CheckpointRemove
|
|
case "rename", "true":
|
|
*t = CheckpointRename
|
|
case "origin":
|
|
*t = CheckpointOrigin
|
|
default:
|
|
return errors.Errorf("invalid checkpoint keep strategy '%s', please choose valid option between ['remove', 'rename', 'origin']", s)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MarshalJSON implements json.Marshaler interface.
|
|
func (t *CheckpointKeepStrategy) MarshalJSON() ([]byte, error) {
|
|
return []byte(`"` + t.String() + `"`), nil
|
|
}
|
|
|
|
// UnmarshalJSON implements json.Unmarshaler interface.
|
|
func (t *CheckpointKeepStrategy) UnmarshalJSON(data []byte) error {
|
|
return t.FromStringValue(strings.Trim(string(data), `"`))
|
|
}
|
|
|
|
// String implements fmt.Stringer interface.
|
|
func (t CheckpointKeepStrategy) String() string {
|
|
switch t {
|
|
case CheckpointRemove:
|
|
return "remove"
|
|
case CheckpointRename:
|
|
return "rename"
|
|
case CheckpointOrigin:
|
|
return "origin"
|
|
default:
|
|
panic(fmt.Sprintf("invalid post process type '%d'", t))
|
|
}
|
|
}
|
|
|
|
// MaxError configures the maximum number of acceptable errors per kind.
|
|
type MaxError struct {
|
|
// Syntax is the maximum number of syntax errors accepted.
|
|
// When tolerated, the file chunk causing syntax error will be skipped, and adds 1 to the counter.
|
|
// TODO Currently this is hard-coded to zero.
|
|
Syntax atomic.Int64 `toml:"syntax" json:"-"`
|
|
|
|
// Charset is the maximum number of character-set conversion errors accepted.
|
|
// When tolerated, and `data-invalid-char-replace` is not changed from "\ufffd",
|
|
// every invalid byte in the source file will be converted to U+FFFD and adds 1 to the counter.
|
|
// Note that a failed conversion a column's character set (e.g. UTF8-to-GBK conversion)
|
|
// is counted as a type error, not a charset error.
|
|
// TODO character-set conversion is not yet implemented.
|
|
Charset atomic.Int64 `toml:"charset" json:"-"`
|
|
|
|
// Type is the maximum number of type errors accepted.
|
|
// This includes strict-mode errors such as zero in dates, integer overflow, character string too long, etc.
|
|
// In TiDB backend, this also includes all possible SQL errors raised from INSERT,
|
|
// such as unique key conflict when `on-duplicate` is set to `error`.
|
|
// When tolerated, the row causing the error will be skipped, and adds 1 to the counter.
|
|
// The default value is zero, which means that such errors are not tolerated.
|
|
Type atomic.Int64 `toml:"type" json:"type"`
|
|
|
|
// deprecated, use `conflict.threshold` instead.
|
|
// Conflict is the maximum number of unique key conflicts in local backend accepted.
|
|
// When tolerated, every pair of conflict adds 1 to the counter.
|
|
// Those pairs will NOT be deleted from the target. Conflict resolution is performed separately.
|
|
// The default value is max int64, which means conflict errors will be recorded as much as possible.
|
|
// Sometime the actual number of conflict record logged will be greater than the value configured here,
|
|
// because conflict error data are recorded batch by batch.
|
|
// If the limit is reached in a single batch, the entire batch of records will be persisted before an error is reported.
|
|
Conflict atomic.Int64 `toml:"conflict" json:"conflict"`
|
|
}
|
|
|
|
// UnmarshalTOML implements toml.Unmarshaler interface.
|
|
func (cfg *MaxError) UnmarshalTOML(v any) error {
|
|
defaultValMap := map[string]int64{
|
|
"syntax": 0,
|
|
"charset": math.MaxInt64,
|
|
"type": 0,
|
|
}
|
|
// set default value first
|
|
cfg.Syntax.Store(defaultValMap["syntax"])
|
|
cfg.Charset.Store(defaultValMap["charset"])
|
|
cfg.Type.Store(defaultValMap["type"])
|
|
switch val := v.(type) {
|
|
case int64:
|
|
// ignore val that is smaller than 0
|
|
if val >= 0 {
|
|
// only set type error
|
|
cfg.Type.Store(val)
|
|
}
|
|
return nil
|
|
case map[string]any:
|
|
// support stuff like `max-error = { charset = 1000, type = 1000 }`.
|
|
getVal := func(k string, v any) int64 {
|
|
defaultVal, ok := defaultValMap[k]
|
|
if !ok {
|
|
return 0
|
|
}
|
|
iVal, ok := v.(int64)
|
|
if !ok || iVal < 0 {
|
|
return defaultVal
|
|
}
|
|
return iVal
|
|
}
|
|
for k, v := range val {
|
|
if k == "type" {
|
|
cfg.Type.Store(getVal(k, v))
|
|
}
|
|
}
|
|
return nil
|
|
default:
|
|
return errors.Errorf("invalid max-error '%v', should be an integer or a map of string:int64", v)
|
|
}
|
|
}
|
|
|
|
// PausePDSchedulerScope the scope when pausing pd schedulers.
|
|
type PausePDSchedulerScope string
|
|
|
|
// constants for PausePDSchedulerScope.
|
|
const (
|
|
// PausePDSchedulerScopeTable pause scheduler by adding schedule=deny label to target key range of the table.
|
|
PausePDSchedulerScopeTable PausePDSchedulerScope = "table"
|
|
// PausePDSchedulerScopeGlobal pause scheduler by remove global schedulers.
|
|
// schedulers removed includes:
|
|
// - balance-leader-scheduler
|
|
// - balance-hot-region-scheduler
|
|
// - balance-region-scheduler
|
|
// - shuffle-leader-scheduler
|
|
// - shuffle-region-scheduler
|
|
// - shuffle-hot-region-scheduler
|
|
// and we also set configs below:
|
|
// - max-merge-region-keys = 0
|
|
// - max-merge-region-size = 0
|
|
// - leader-schedule-limit = min(40, <store-count> * <current value of leader-schedule-limit>)
|
|
// - region-schedule-limit = min(40, <store-count> * <current value of region-schedule-limit>)
|
|
// - max-snapshot-count = min(40, <store-count> * <current value of max-snapshot-count>)
|
|
// - enable-location-replacement = false
|
|
// - max-pending-peer-count = math.MaxInt32
|
|
// see br/pkg/pdutil/pd.go for more detail.
|
|
PausePDSchedulerScopeGlobal PausePDSchedulerScope = "global"
|
|
)
|
|
|
|
// DuplicateResolutionAlgorithm is the config type of how to resolve duplicates.
|
|
type DuplicateResolutionAlgorithm int
|
|
|
|
const (
|
|
// NoneOnDup does nothing when detecting duplicate.
|
|
NoneOnDup DuplicateResolutionAlgorithm = iota
|
|
// ReplaceOnDup indicates using REPLACE INTO to insert data for TiDB backend.
|
|
// ReplaceOnDup records all duplicate records, remove some rows with conflict
|
|
// and reserve other rows that can be kept and not cause conflict anymore for local backend.
|
|
// Users need to analyze the lightning_task_info.conflict_view table to check whether the reserved data
|
|
// cater to their need and check whether they need to add back the correct rows.
|
|
ReplaceOnDup
|
|
// IgnoreOnDup indicates using INSERT IGNORE INTO to insert data for TiDB backend.
|
|
// Local backend does not support IgnoreOnDup.
|
|
IgnoreOnDup
|
|
// ErrorOnDup indicates using INSERT INTO to insert data for TiDB backend, which would violate PK or UNIQUE constraint when detecting duplicate.
|
|
// ErrorOnDup reports an error after detecting the first conflict and stops the import process for local backend.
|
|
ErrorOnDup
|
|
)
|
|
|
|
// UnmarshalTOML implements the toml.Unmarshaler interface.
|
|
func (dra *DuplicateResolutionAlgorithm) UnmarshalTOML(v any) error {
|
|
if val, ok := v.(string); ok {
|
|
return dra.FromStringValue(val)
|
|
}
|
|
return errors.Errorf("invalid conflict.strategy '%v', please choose valid option between ['', 'replace', 'ignore', 'error']", v)
|
|
}
|
|
|
|
// MarshalText implements the encoding.TextMarshaler interface.
|
|
func (dra DuplicateResolutionAlgorithm) MarshalText() ([]byte, error) {
|
|
return []byte(dra.String()), nil
|
|
}
|
|
|
|
// FromStringValue parses the string value to the DuplicateResolutionAlgorithm.
|
|
func (dra *DuplicateResolutionAlgorithm) FromStringValue(s string) error {
|
|
switch strings.ToLower(s) {
|
|
case "", "none":
|
|
*dra = NoneOnDup
|
|
case "replace":
|
|
*dra = ReplaceOnDup
|
|
case "ignore":
|
|
*dra = IgnoreOnDup
|
|
case "error":
|
|
*dra = ErrorOnDup
|
|
case "remove", "record":
|
|
log.L().Warn("\"conflict.strategy '%s' is no longer supported, has been converted to 'replace'")
|
|
*dra = ReplaceOnDup
|
|
default:
|
|
return errors.Errorf("invalid conflict.strategy '%s', please choose valid option between ['', 'replace', 'ignore', 'error']", s)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MarshalJSON implements the json.Marshaler interface.
|
|
func (dra *DuplicateResolutionAlgorithm) MarshalJSON() ([]byte, error) {
|
|
return []byte(`"` + dra.String() + `"`), nil
|
|
}
|
|
|
|
// UnmarshalJSON implements the json.Unmarshaler interface.
|
|
func (dra *DuplicateResolutionAlgorithm) UnmarshalJSON(data []byte) error {
|
|
return dra.FromStringValue(strings.Trim(string(data), `"`))
|
|
}
|
|
|
|
// String implements the fmt.Stringer interface.
|
|
func (dra DuplicateResolutionAlgorithm) String() string {
|
|
switch dra {
|
|
case NoneOnDup:
|
|
return ""
|
|
case ReplaceOnDup:
|
|
return "replace"
|
|
case IgnoreOnDup:
|
|
return "ignore"
|
|
case ErrorOnDup:
|
|
return "error"
|
|
default:
|
|
panic(fmt.Sprintf("invalid conflict.strategy type '%d'", dra))
|
|
}
|
|
}
|
|
|
|
// CompressionType is the config type of compression algorithm.
|
|
type CompressionType int
|
|
|
|
const (
|
|
// CompressionNone means no compression.
|
|
CompressionNone CompressionType = iota
|
|
// CompressionGzip means gzip compression.
|
|
CompressionGzip
|
|
)
|
|
|
|
// UnmarshalTOML implements toml.Unmarshaler.
|
|
func (t *CompressionType) UnmarshalTOML(v any) error {
|
|
if val, ok := v.(string); ok {
|
|
return t.FromStringValue(val)
|
|
}
|
|
return errors.Errorf("invalid compression-type '%v', please choose valid option between ['gzip']", v)
|
|
}
|
|
|
|
// MarshalText implements encoding.TextMarshaler.
|
|
func (t CompressionType) MarshalText() ([]byte, error) {
|
|
return []byte(t.String()), nil
|
|
}
|
|
|
|
// FromStringValue parses a string to CompressionType.
|
|
func (t *CompressionType) FromStringValue(s string) error {
|
|
switch strings.ToLower(s) {
|
|
case "":
|
|
*t = CompressionNone
|
|
case "gz", "gzip":
|
|
*t = CompressionGzip
|
|
default:
|
|
return errors.Errorf("invalid compression-type '%s', please choose valid option between ['gzip']", s)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MarshalJSON implements json.Marshaler.
|
|
func (t *CompressionType) MarshalJSON() ([]byte, error) {
|
|
return []byte(`"` + t.String() + `"`), nil
|
|
}
|
|
|
|
// UnmarshalJSON implements json.Unmarshaler.
|
|
func (t *CompressionType) UnmarshalJSON(data []byte) error {
|
|
return t.FromStringValue(strings.Trim(string(data), `"`))
|
|
}
|
|
|
|
// String implements fmt.Stringer.
|
|
func (t CompressionType) String() string {
|
|
switch t {
|
|
case CompressionGzip:
|
|
return "gzip"
|
|
case CompressionNone:
|
|
return ""
|
|
default:
|
|
panic(fmt.Sprintf("invalid compression type '%d'", t))
|
|
}
|
|
}
|
|
|
|
// PostRestore has some options which will be executed after kv restored.
|
|
type PostRestore struct {
|
|
Checksum PostOpLevel `toml:"checksum" json:"checksum"`
|
|
Analyze PostOpLevel `toml:"analyze" json:"analyze"`
|
|
Level1Compact bool `toml:"level-1-compact" json:"level-1-compact"`
|
|
PostProcessAtLast bool `toml:"post-process-at-last" json:"post-process-at-last"`
|
|
Compact bool `toml:"compact" json:"compact"`
|
|
ChecksumViaSQL bool `toml:"checksum-via-sql" json:"checksum-via-sql"`
|
|
}
|
|
|
|
// adjust assigns default values and check illegal values. The input TikvImporter
|
|
// must be adjusted before calling this function.
|
|
func (p *PostRestore) adjust(i *TikvImporter) {
|
|
if i.Backend != BackendTiDB {
|
|
return
|
|
}
|
|
p.Checksum = OpLevelOff
|
|
p.Analyze = OpLevelOff
|
|
p.Compact = false
|
|
p.ChecksumViaSQL = false
|
|
}
|
|
|
|
// StringOrStringSlice can unmarshal a TOML string as string slice with one element.
|
|
type StringOrStringSlice []string
|
|
|
|
// UnmarshalTOML implements the toml.Unmarshaler interface.
|
|
func (s *StringOrStringSlice) UnmarshalTOML(in any) error {
|
|
switch v := in.(type) {
|
|
case string:
|
|
*s = []string{v}
|
|
case []any:
|
|
*s = make([]string, 0, len(v))
|
|
for _, vv := range v {
|
|
vs, ok := vv.(string)
|
|
if !ok {
|
|
return errors.Errorf("invalid string slice '%v'", in)
|
|
}
|
|
*s = append(*s, vs)
|
|
}
|
|
default:
|
|
return errors.Errorf("invalid string slice '%v'", in)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// CSVConfig is the config for CSV files.
|
|
type CSVConfig struct {
|
|
// Separator, Delimiter and Terminator should all be in utf8mb4 encoding.
|
|
Separator string `toml:"separator" json:"separator"`
|
|
Delimiter string `toml:"delimiter" json:"delimiter"`
|
|
Terminator string `toml:"terminator" json:"terminator"`
|
|
Null StringOrStringSlice `toml:"null" json:"null"`
|
|
Header bool `toml:"header" json:"header"`
|
|
HeaderSchemaMatch bool `toml:"header-schema-match" json:"header-schema-match"`
|
|
TrimLastSep bool `toml:"trim-last-separator" json:"trim-last-separator"`
|
|
NotNull bool `toml:"not-null" json:"not-null"`
|
|
// deprecated, use `escaped-by` instead.
|
|
BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"`
|
|
// EscapedBy has higher priority than BackslashEscape, currently it must be a single character if set.
|
|
EscapedBy string `toml:"escaped-by" json:"escaped-by"`
|
|
|
|
// hide these options for lightning configuration file, they can only be used by LOAD DATA
|
|
// https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-field-line-handling
|
|
StartingBy string `toml:"-" json:"-"`
|
|
AllowEmptyLine bool `toml:"-" json:"-"`
|
|
// For non-empty Delimiter (for example quotes), null elements inside quotes are not considered as null except for
|
|
// `\N` (when escape-by is `\`). That is to say, `\N` is special for null because it always means null.
|
|
QuotedNullIsText bool `toml:"-" json:"-"`
|
|
// ref https://dev.mysql.com/doc/refman/8.0/en/load-data.html
|
|
// > If the field begins with the ENCLOSED BY character, instances of that character are recognized as terminating a
|
|
// > field value only if followed by the field or line TERMINATED BY sequence.
|
|
// This means we will meet unescaped quote in a quoted field
|
|
// > The "BIG" boss -> The "BIG" boss
|
|
// This means we will meet unescaped quote in a unquoted field
|
|
UnescapedQuote bool `toml:"-" json:"-"`
|
|
}
|
|
|
|
func (csv *CSVConfig) adjust() error {
|
|
if len(csv.Separator) == 0 {
|
|
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.separator` must not be empty")
|
|
}
|
|
|
|
if len(csv.Delimiter) > 0 && (strings.HasPrefix(csv.Separator, csv.Delimiter) || strings.HasPrefix(csv.Delimiter, csv.Separator)) {
|
|
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other")
|
|
}
|
|
|
|
if len(csv.EscapedBy) > 1 {
|
|
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.escaped-by` must be empty or a single character")
|
|
}
|
|
if csv.BackslashEscape && csv.EscapedBy == "" {
|
|
csv.EscapedBy = `\`
|
|
}
|
|
if !csv.BackslashEscape && csv.EscapedBy == `\` {
|
|
csv.EscapedBy = ""
|
|
}
|
|
|
|
// keep compatibility with old behaviour
|
|
if !csv.NotNull && len(csv.Null) == 0 {
|
|
csv.Null = []string{""}
|
|
}
|
|
|
|
if len(csv.EscapedBy) > 0 {
|
|
if csv.Separator == csv.EscapedBy {
|
|
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV separator and `mydumper.csv.escaped-by`", csv.EscapedBy)
|
|
}
|
|
if csv.Delimiter == csv.EscapedBy {
|
|
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV delimiter and `mydumper.csv.escaped-by`", csv.EscapedBy)
|
|
}
|
|
if csv.Terminator == csv.EscapedBy {
|
|
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV terminator and `mydumper.csv.escaped-by`", csv.EscapedBy)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MydumperRuntime is the runtime config for mydumper.
|
|
type MydumperRuntime struct {
|
|
ReadBlockSize ByteSize `toml:"read-block-size" json:"read-block-size"`
|
|
BatchSize ByteSize `toml:"batch-size" json:"batch-size"`
|
|
BatchImportRatio float64 `toml:"batch-import-ratio" json:"batch-import-ratio"`
|
|
SourceID string `toml:"source-id" json:"source-id"`
|
|
SourceDir string `toml:"data-source-dir" json:"data-source-dir"`
|
|
CharacterSet string `toml:"character-set" json:"character-set"`
|
|
CSV CSVConfig `toml:"csv" json:"csv"`
|
|
MaxRegionSize ByteSize `toml:"max-region-size" json:"max-region-size"`
|
|
Filter []string `toml:"filter" json:"filter"`
|
|
FileRouters []*FileRouteRule `toml:"files" json:"files"`
|
|
// Deprecated: only used to keep the compatibility.
|
|
NoSchema bool `toml:"no-schema" json:"no-schema"`
|
|
CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"`
|
|
StrictFormat bool `toml:"strict-format" json:"strict-format"`
|
|
DefaultFileRules bool `toml:"default-file-rules" json:"default-file-rules"`
|
|
IgnoreColumns AllIgnoreColumns `toml:"ignore-data-columns" json:"ignore-data-columns"`
|
|
// DataCharacterSet is the character set of the source file. Only CSV files are supported now. The following options are supported.
|
|
// - utf8mb4
|
|
// - GB18030
|
|
// - GBK: an extension of the GB2312 character set and is also known as Code Page 936.
|
|
// - latin1: IANA Windows1252
|
|
// - binary: no attempt to convert the encoding.
|
|
// Leave DataCharacterSet empty will make it use `binary` by default.
|
|
DataCharacterSet string `toml:"data-character-set" json:"data-character-set"`
|
|
// DataInvalidCharReplace is the replacement characters for non-compatible characters, which shouldn't duplicate with the separators or line breaks.
|
|
// Changing the default value will result in increased parsing time. Non-compatible characters do not cause an increase in error.
|
|
DataInvalidCharReplace string `toml:"data-invalid-char-replace" json:"data-invalid-char-replace"`
|
|
}
|
|
|
|
func (m *MydumperRuntime) adjust() error {
|
|
if err := m.CSV.adjust(); err != nil {
|
|
return err
|
|
}
|
|
if m.StrictFormat && len(m.CSV.Terminator) == 0 {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
`mydumper.strict-format can not be used with empty mydumper.csv.terminator. Please set mydumper.csv.terminator to a non-empty value like "\r\n"`)
|
|
}
|
|
|
|
for _, rule := range m.FileRouters {
|
|
if filepath.IsAbs(rule.Path) {
|
|
relPath, err := filepath.Rel(m.SourceDir, rule.Path)
|
|
if err != nil {
|
|
return common.ErrInvalidConfig.Wrap(err).
|
|
GenWithStack("cannot find relative path for file route path %s", rule.Path)
|
|
}
|
|
// ".." means that this path is not in source dir, so we should return an error
|
|
if strings.HasPrefix(relPath, "..") {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
"file route path '%s' is not in source dir '%s'", rule.Path, m.SourceDir)
|
|
}
|
|
rule.Path = relPath
|
|
}
|
|
}
|
|
|
|
// enable default file route rule if no rules are set
|
|
if len(m.FileRouters) == 0 {
|
|
m.DefaultFileRules = true
|
|
}
|
|
|
|
if len(m.DataCharacterSet) == 0 {
|
|
m.DataCharacterSet = defaultCSVDataCharacterSet
|
|
}
|
|
charset, err1 := ParseCharset(m.DataCharacterSet)
|
|
if err1 != nil {
|
|
return common.ErrInvalidConfig.Wrap(err1).GenWithStack("invalid `mydumper.data-character-set`")
|
|
}
|
|
if charset == GBK || charset == GB18030 {
|
|
log.L().Warn(
|
|
"incompatible strings may be encountered during the transcoding process and will be replaced, please be aware of the risk of not being able to retain the original information",
|
|
zap.String("source-character-set", charset.String()),
|
|
zap.ByteString("invalid-char-replacement", []byte(m.DataInvalidCharReplace)))
|
|
}
|
|
if m.BatchImportRatio < 0.0 || m.BatchImportRatio >= 1.0 {
|
|
m.BatchImportRatio = DefaultBatchImportRatio
|
|
}
|
|
if m.ReadBlockSize <= 0 {
|
|
m.ReadBlockSize = ReadBlockSize
|
|
}
|
|
if len(m.CharacterSet) == 0 {
|
|
m.CharacterSet = "auto"
|
|
}
|
|
|
|
if len(m.IgnoreColumns) != 0 {
|
|
// Tolower columns cause we use Name.L to compare column in tidb.
|
|
for _, ig := range m.IgnoreColumns {
|
|
cols := make([]string, len(ig.Columns))
|
|
for i, col := range ig.Columns {
|
|
cols[i] = strings.ToLower(col)
|
|
}
|
|
ig.Columns = cols
|
|
}
|
|
}
|
|
return m.adjustFilePath()
|
|
}
|
|
|
|
// adjustFilePath checks and adjusts the file path.
|
|
func (m *MydumperRuntime) adjustFilePath() error {
|
|
var u *url.URL
|
|
|
|
// An absolute Windows path like "C:\Users\XYZ" would be interpreted as
|
|
// an URL with scheme "C" and opaque data "\Users\XYZ".
|
|
// Therefore, we only perform URL parsing if we are sure the path is not
|
|
// an absolute Windows path.
|
|
// Here we use the `filepath.VolumeName` which can identify the "C:" part
|
|
// out of the path. On Linux this method always return an empty string.
|
|
// On Windows, the drive letter can only be single letters from "A:" to "Z:",
|
|
// so this won't mistake "S3:" as a Windows path.
|
|
if len(filepath.VolumeName(m.SourceDir)) == 0 {
|
|
var err error
|
|
u, err = url.Parse(m.SourceDir)
|
|
if err != nil {
|
|
return common.ErrInvalidConfig.Wrap(err).GenWithStack("cannot parse `mydumper.data-source-dir` %s", m.SourceDir)
|
|
}
|
|
} else {
|
|
u = &url.URL{}
|
|
}
|
|
|
|
// convert path and relative path to a valid file url
|
|
if u.Scheme == "" {
|
|
if m.SourceDir == "" {
|
|
return common.ErrInvalidConfig.GenWithStack("`mydumper.data-source-dir` is not set")
|
|
}
|
|
if !common.IsDirExists(m.SourceDir) {
|
|
return common.ErrInvalidConfig.GenWithStack("'%s': `mydumper.data-source-dir` does not exist", m.SourceDir)
|
|
}
|
|
absPath, err := filepath.Abs(m.SourceDir)
|
|
if err != nil {
|
|
return common.ErrInvalidConfig.Wrap(err).GenWithStack("covert data-source-dir '%s' to absolute path failed", m.SourceDir)
|
|
}
|
|
u.Path = filepath.ToSlash(absPath)
|
|
u.Scheme = "file"
|
|
m.SourceDir = u.String()
|
|
}
|
|
|
|
found := false
|
|
for _, t := range supportedStorageTypes {
|
|
if u.Scheme == t {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
"unsupported data-source-dir url '%s', supported storage types are %s",
|
|
m.SourceDir, strings.Join(supportedStorageTypes, ","))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// AllIgnoreColumns is a slice of IgnoreColumns.
|
|
type AllIgnoreColumns []*IgnoreColumns
|
|
|
|
// IgnoreColumns is the config for ignoring columns.
|
|
type IgnoreColumns struct {
|
|
DB string `toml:"db" json:"db"`
|
|
Table string `toml:"table" json:"table"`
|
|
TableFilter []string `toml:"table-filter" json:"table-filter"`
|
|
Columns []string `toml:"columns" json:"columns"`
|
|
}
|
|
|
|
// ColumnsMap returns a map of columns.
|
|
func (ic *IgnoreColumns) ColumnsMap() map[string]struct{} {
|
|
columnMap := make(map[string]struct{}, len(ic.Columns))
|
|
for _, c := range ic.Columns {
|
|
columnMap[c] = struct{}{}
|
|
}
|
|
return columnMap
|
|
}
|
|
|
|
// GetIgnoreColumns gets Ignore config by schema name/regex and table name/regex.
|
|
func (igCols AllIgnoreColumns) GetIgnoreColumns(db string, table string, caseSensitive bool) (*IgnoreColumns, error) {
|
|
if !caseSensitive {
|
|
db = strings.ToLower(db)
|
|
table = strings.ToLower(table)
|
|
}
|
|
for i, ig := range igCols {
|
|
if ig.DB == db && ig.Table == table {
|
|
return igCols[i], nil
|
|
}
|
|
f, err := filter.Parse(ig.TableFilter)
|
|
if err != nil {
|
|
return nil, common.ErrInvalidConfig.GenWithStack("invalid table filter %s in ignore columns", strings.Join(ig.TableFilter, ","))
|
|
}
|
|
if f.MatchTable(db, table) {
|
|
return igCols[i], nil
|
|
}
|
|
}
|
|
return &IgnoreColumns{Columns: make([]string, 0)}, nil
|
|
}
|
|
|
|
// FileRouteRule is the rule for routing files.
|
|
type FileRouteRule struct {
|
|
Pattern string `json:"pattern" toml:"pattern" yaml:"pattern"`
|
|
Path string `json:"path" toml:"path" yaml:"path"`
|
|
Schema string `json:"schema" toml:"schema" yaml:"schema"`
|
|
Table string `json:"table" toml:"table" yaml:"table"`
|
|
Type string `json:"type" toml:"type" yaml:"type"`
|
|
Key string `json:"key" toml:"key" yaml:"key"`
|
|
Compression string `json:"compression" toml:"compression" yaml:"compression"`
|
|
// unescape the schema/table name only used in lightning's internal logic now.
|
|
Unescape bool `json:"-" toml:"-" yaml:"-"`
|
|
// TODO: DataCharacterSet here can override the same field in [mydumper.csv] with a higher level.
|
|
// This could provide users a more flexible usage to configure different files with
|
|
// different data charsets.
|
|
// DataCharacterSet string `toml:"data-character-set" json:"data-character-set"`
|
|
}
|
|
|
|
// TikvImporter is the config for tikv-importer.
|
|
type TikvImporter struct {
|
|
// Deprecated: only used to keep the compatibility.
|
|
Addr string `toml:"addr" json:"addr"`
|
|
Backend string `toml:"backend" json:"backend"`
|
|
// deprecated, use Conflict.Strategy instead.
|
|
OnDuplicate DuplicateResolutionAlgorithm `toml:"on-duplicate" json:"on-duplicate"`
|
|
MaxKVPairs int `toml:"max-kv-pairs" json:"max-kv-pairs"`
|
|
// deprecated
|
|
SendKVPairs int `toml:"send-kv-pairs" json:"send-kv-pairs"`
|
|
SendKVSize ByteSize `toml:"send-kv-size" json:"send-kv-size"`
|
|
CompressKVPairs CompressionType `toml:"compress-kv-pairs" json:"compress-kv-pairs"`
|
|
RegionSplitSize ByteSize `toml:"region-split-size" json:"region-split-size"`
|
|
RegionSplitKeys int `toml:"region-split-keys" json:"region-split-keys"`
|
|
RegionSplitBatchSize int `toml:"region-split-batch-size" json:"region-split-batch-size"`
|
|
RegionSplitConcurrency int `toml:"region-split-concurrency" json:"region-split-concurrency"`
|
|
RegionCheckBackoffLimit int `toml:"region-check-backoff-limit" json:"region-check-backoff-limit"`
|
|
SortedKVDir string `toml:"sorted-kv-dir" json:"sorted-kv-dir"`
|
|
DiskQuota ByteSize `toml:"disk-quota" json:"disk-quota"`
|
|
RangeConcurrency int `toml:"range-concurrency" json:"range-concurrency"`
|
|
// deprecated, use Conflict.Strategy instead.
|
|
DuplicateResolution DuplicateResolutionAlgorithm `toml:"duplicate-resolution" json:"duplicate-resolution"`
|
|
// deprecated, use ParallelImport instead.
|
|
IncrementalImport bool `toml:"incremental-import" json:"incremental-import"`
|
|
ParallelImport bool `toml:"parallel-import" json:"parallel-import"`
|
|
KeyspaceName string `toml:"keyspace-name" json:"keyspace-name"`
|
|
AddIndexBySQL bool `toml:"add-index-by-sql" json:"add-index-by-sql"`
|
|
|
|
EngineMemCacheSize ByteSize `toml:"engine-mem-cache-size" json:"engine-mem-cache-size"`
|
|
LocalWriterMemCacheSize ByteSize `toml:"local-writer-mem-cache-size" json:"local-writer-mem-cache-size"`
|
|
StoreWriteBWLimit ByteSize `toml:"store-write-bwlimit" json:"store-write-bwlimit"`
|
|
LogicalImportBatchSize ByteSize `toml:"logical-import-batch-size" json:"logical-import-batch-size"`
|
|
LogicalImportBatchRows int `toml:"logical-import-batch-rows" json:"logical-import-batch-rows"`
|
|
|
|
// default is PausePDSchedulerScopeTable to compatible with previous version(>= 6.1)
|
|
PausePDSchedulerScope PausePDSchedulerScope `toml:"pause-pd-scheduler-scope" json:"pause-pd-scheduler-scope"`
|
|
BlockSize ByteSize `toml:"block-size" json:"block-size"`
|
|
}
|
|
|
|
func (t *TikvImporter) adjust() error {
|
|
if t.Backend == "" {
|
|
return common.ErrInvalidConfig.GenWithStack("tikv-importer.backend must not be empty!")
|
|
}
|
|
t.Backend = strings.ToLower(t.Backend)
|
|
// only need to assign t.IncrementalImport to t.ParallelImport when t.ParallelImport is false and t.IncrementalImport is true
|
|
if !t.ParallelImport && t.IncrementalImport {
|
|
t.ParallelImport = t.IncrementalImport
|
|
}
|
|
switch t.Backend {
|
|
case BackendTiDB:
|
|
if t.LogicalImportBatchSize <= 0 {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
"`tikv-importer.logical-import-batch-size` got %d, should be larger than 0",
|
|
t.LogicalImportBatchSize)
|
|
}
|
|
if t.LogicalImportBatchRows <= 0 {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
"`tikv-importer.logical-import-batch-rows` got %d, should be larger than 0",
|
|
t.LogicalImportBatchRows)
|
|
}
|
|
case BackendLocal:
|
|
if t.RegionSplitBatchSize <= 0 {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
"`tikv-importer.region-split-batch-size` got %d, should be larger than 0",
|
|
t.RegionSplitBatchSize)
|
|
}
|
|
if t.RegionSplitConcurrency <= 0 {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
"`tikv-importer.region-split-concurrency` got %d, should be larger than 0",
|
|
t.RegionSplitConcurrency)
|
|
}
|
|
if t.RangeConcurrency == 0 {
|
|
t.RangeConcurrency = DefaultRangeConcurrency
|
|
}
|
|
if t.EngineMemCacheSize == 0 {
|
|
t.EngineMemCacheSize = DefaultEngineMemCacheSize
|
|
}
|
|
if t.LocalWriterMemCacheSize == 0 {
|
|
t.LocalWriterMemCacheSize = DefaultLocalWriterMemCacheSize
|
|
}
|
|
if t.BlockSize == 0 {
|
|
t.BlockSize = DefaultBlockSize
|
|
}
|
|
|
|
if t.ParallelImport && t.AddIndexBySQL {
|
|
return common.ErrInvalidConfig.
|
|
GenWithStack("tikv-importer.add-index-using-ddl cannot be used with tikv-importer.parallel-import")
|
|
}
|
|
|
|
if len(t.SortedKVDir) == 0 {
|
|
return common.ErrInvalidConfig.GenWithStack("tikv-importer.sorted-kv-dir must not be empty!")
|
|
}
|
|
|
|
storageSizeDir := filepath.Clean(t.SortedKVDir)
|
|
sortedKVDirInfo, err := os.Stat(storageSizeDir)
|
|
|
|
switch {
|
|
case os.IsNotExist(err):
|
|
case err == nil:
|
|
if !sortedKVDirInfo.IsDir() {
|
|
return common.ErrInvalidConfig.
|
|
GenWithStack("tikv-importer.sorted-kv-dir ('%s') is not a directory", storageSizeDir)
|
|
}
|
|
default:
|
|
return common.ErrInvalidConfig.Wrap(err).GenWithStack("invalid tikv-importer.sorted-kv-dir")
|
|
}
|
|
default:
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
"unsupported `tikv-importer.backend` (%s)",
|
|
t.Backend)
|
|
}
|
|
|
|
t.PausePDSchedulerScope = PausePDSchedulerScope(strings.ToLower(string(t.PausePDSchedulerScope)))
|
|
switch t.PausePDSchedulerScope {
|
|
case PausePDSchedulerScopeTable, PausePDSchedulerScopeGlobal:
|
|
default:
|
|
return common.ErrInvalidConfig.GenWithStack("pause-pd-scheduler-scope is invalid, allowed value include: table, global")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Checkpoint is the config for checkpoint.
|
|
type Checkpoint struct {
|
|
Schema string `toml:"schema" json:"schema"`
|
|
DSN string `toml:"dsn" json:"-"` // DSN may contain password, don't expose this to JSON.
|
|
MySQLParam *common.MySQLConnectParam `toml:"-" json:"-"` // For some security reason, we use MySQLParam instead of DSN.
|
|
Driver string `toml:"driver" json:"driver"`
|
|
Enable bool `toml:"enable" json:"enable"`
|
|
KeepAfterSuccess CheckpointKeepStrategy `toml:"keep-after-success" json:"keep-after-success"`
|
|
}
|
|
|
|
// adjust assigns default values and check illegal values. The input DBStore
|
|
// must be adjusted before calling this function.
|
|
func (c *Checkpoint) adjust(t *DBStore) {
|
|
if len(c.Schema) == 0 {
|
|
c.Schema = "tidb_lightning_checkpoint"
|
|
}
|
|
if len(c.Driver) == 0 {
|
|
c.Driver = CheckpointDriverFile
|
|
}
|
|
if len(c.DSN) == 0 {
|
|
switch c.Driver {
|
|
case CheckpointDriverMySQL:
|
|
param := common.MySQLConnectParam{
|
|
Host: t.Host,
|
|
Port: t.Port,
|
|
User: t.User,
|
|
Password: t.Psw,
|
|
SQLMode: mysql.DefaultSQLMode,
|
|
MaxAllowedPacket: defaultMaxAllowedPacket,
|
|
TLSConfig: t.Security.TLSConfig,
|
|
AllowFallbackToPlaintext: t.Security.AllowFallbackToPlaintext,
|
|
}
|
|
c.MySQLParam = ¶m
|
|
case CheckpointDriverFile:
|
|
c.DSN = "/tmp/" + c.Schema + ".pb"
|
|
}
|
|
} else {
|
|
// try to remove allowAllFiles
|
|
mysqlCfg, err := gomysql.ParseDSN(c.DSN)
|
|
if err != nil {
|
|
return
|
|
}
|
|
mysqlCfg.AllowAllFiles = false
|
|
c.DSN = mysqlCfg.FormatDSN()
|
|
}
|
|
}
|
|
|
|
// Cron is the config for cron.
|
|
type Cron struct {
|
|
SwitchMode Duration `toml:"switch-mode" json:"switch-mode"`
|
|
LogProgress Duration `toml:"log-progress" json:"log-progress"`
|
|
CheckDiskQuota Duration `toml:"check-disk-quota" json:"check-disk-quota"`
|
|
}
|
|
|
|
// Security is the config for security.
|
|
type Security struct {
|
|
CAPath string `toml:"ca-path" json:"ca-path"`
|
|
CertPath string `toml:"cert-path" json:"cert-path"`
|
|
KeyPath string `toml:"key-path" json:"key-path"`
|
|
// RedactInfoLog indicates that whether enabling redact log
|
|
RedactInfoLog bool `toml:"redact-info-log" json:"redact-info-log"`
|
|
|
|
TLSConfig *tls.Config `toml:"-" json:"-"`
|
|
AllowFallbackToPlaintext bool `toml:"-" json:"-"`
|
|
|
|
// When DM/engine uses lightning as a library, it can directly pass in the content
|
|
CABytes []byte `toml:"-" json:"-"`
|
|
CertBytes []byte `toml:"-" json:"-"`
|
|
KeyBytes []byte `toml:"-" json:"-"`
|
|
}
|
|
|
|
// BuildTLSConfig builds the tls config which is used by SQL drier later.
|
|
func (sec *Security) BuildTLSConfig() error {
|
|
if sec == nil || sec.TLSConfig != nil {
|
|
return nil
|
|
}
|
|
|
|
tlsConfig, err := util.NewTLSConfig(
|
|
util.WithCAPath(sec.CAPath),
|
|
util.WithCertAndKeyPath(sec.CertPath, sec.KeyPath),
|
|
util.WithCAContent(sec.CABytes),
|
|
util.WithCertAndKeyContent(sec.CertBytes, sec.KeyBytes),
|
|
)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
sec.TLSConfig = tlsConfig
|
|
return nil
|
|
}
|
|
|
|
// Duration which can be deserialized from a TOML string.
|
|
// Implemented as https://github.com/BurntSushi/toml#using-the-encodingtextunmarshaler-interface
|
|
type Duration struct {
|
|
time.Duration
|
|
}
|
|
|
|
// UnmarshalText implements encoding.TextUnmarshaler.
|
|
func (d *Duration) UnmarshalText(text []byte) error {
|
|
var err error
|
|
d.Duration, err = time.ParseDuration(string(text))
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
// MarshalText implements encoding.TextMarshaler.
|
|
func (d Duration) MarshalText() ([]byte, error) {
|
|
return []byte(d.String()), nil
|
|
}
|
|
|
|
// MarshalJSON implements json.Marshaler.
|
|
func (d *Duration) MarshalJSON() ([]byte, error) {
|
|
return []byte(fmt.Sprintf(`"%s"`, d.Duration)), nil
|
|
}
|
|
|
|
// Charset defines character set
|
|
type Charset int
|
|
|
|
// Charset constants
|
|
const (
|
|
Binary Charset = iota
|
|
UTF8MB4
|
|
GB18030
|
|
GBK
|
|
Latin1
|
|
ASCII
|
|
)
|
|
|
|
// String return the string value of charset
|
|
func (c Charset) String() string {
|
|
switch c {
|
|
case Binary:
|
|
return "binary"
|
|
case UTF8MB4:
|
|
return "utf8mb4"
|
|
case GB18030:
|
|
return "gb18030"
|
|
case GBK:
|
|
return "gbk"
|
|
case Latin1:
|
|
return "latin1"
|
|
case ASCII:
|
|
return "ascii"
|
|
default:
|
|
return "unknown_charset"
|
|
}
|
|
}
|
|
|
|
// ParseCharset parser character set for string
|
|
func ParseCharset(dataCharacterSet string) (Charset, error) {
|
|
switch strings.ToLower(dataCharacterSet) {
|
|
case "", "binary":
|
|
return Binary, nil
|
|
case "utf8", "utf8mb4":
|
|
return UTF8MB4, nil
|
|
case "gb18030":
|
|
return GB18030, nil
|
|
case "gbk":
|
|
return GBK, nil
|
|
case "latin1":
|
|
return Latin1, nil
|
|
case "ascii":
|
|
return ASCII, nil
|
|
default:
|
|
return Binary, errors.Errorf("found unsupported data-character-set: %s", dataCharacterSet)
|
|
}
|
|
}
|
|
|
|
// Conflict is the config section for PK/UK conflict related configurations.
|
|
type Conflict struct {
|
|
Strategy DuplicateResolutionAlgorithm `toml:"strategy" json:"strategy"`
|
|
PrecheckConflictBeforeImport bool `toml:"precheck-conflict-before-import" json:"precheck-conflict-before-import"`
|
|
Threshold int64 `toml:"threshold" json:"threshold"`
|
|
MaxRecordRows int64 `toml:"max-record-rows" json:"max-record-rows"`
|
|
}
|
|
|
|
// adjust assigns default values and check illegal values. The arguments must be
|
|
// adjusted before calling this function.
|
|
func (c *Conflict) adjust(i *TikvImporter) error {
|
|
strategyConfigFrom := "conflict.strategy"
|
|
if c.Strategy == NoneOnDup {
|
|
if i.OnDuplicate == NoneOnDup && i.Backend == BackendTiDB {
|
|
c.Strategy = ErrorOnDup
|
|
}
|
|
if i.OnDuplicate != NoneOnDup {
|
|
strategyConfigFrom = "tikv-importer.on-duplicate"
|
|
c.Strategy = i.OnDuplicate
|
|
}
|
|
}
|
|
strategyFromDuplicateResolution := false
|
|
if c.Strategy == NoneOnDup && i.DuplicateResolution != NoneOnDup {
|
|
c.Strategy = i.DuplicateResolution
|
|
strategyFromDuplicateResolution = true
|
|
}
|
|
switch c.Strategy {
|
|
case ReplaceOnDup, IgnoreOnDup, ErrorOnDup, NoneOnDup:
|
|
default:
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
"unsupported `%s` (%s)", strategyConfigFrom, c.Strategy)
|
|
}
|
|
if !strategyFromDuplicateResolution && c.Strategy != NoneOnDup && i.DuplicateResolution != NoneOnDup {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
"%s cannot be used with tikv-importer.duplicate-resolution",
|
|
strategyConfigFrom)
|
|
}
|
|
if c.Strategy == IgnoreOnDup && i.Backend == BackendLocal {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
`%s cannot be set to "ignore" when use tikv-importer.backend = "local"`,
|
|
strategyConfigFrom)
|
|
}
|
|
if c.PrecheckConflictBeforeImport && i.Backend == BackendTiDB {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
`conflict.precheck-conflict-before-import cannot be set to true when use tikv-importer.backend = "tidb"`)
|
|
}
|
|
|
|
if c.Threshold < 0 {
|
|
switch c.Strategy {
|
|
case ErrorOnDup, NoneOnDup:
|
|
c.Threshold = 0
|
|
case IgnoreOnDup, ReplaceOnDup:
|
|
c.Threshold = DefaultRecordDuplicateThreshold
|
|
}
|
|
}
|
|
if c.Threshold > 0 && c.Strategy == ErrorOnDup {
|
|
return common.ErrInvalidConfig.GenWithStack(
|
|
`conflict.threshold cannot be set when use conflict.strategy = "error"`)
|
|
}
|
|
|
|
if c.Strategy == ReplaceOnDup && i.Backend == BackendTiDB {
|
|
// due to we use batch insert, we can't know which row is duplicated.
|
|
if c.MaxRecordRows >= 0 {
|
|
// only warn when it is set by user.
|
|
log.L().Warn(`Cannot record duplication (conflict.max-record-rows > 0) when use tikv-importer.backend = \"tidb\" and conflict.strategy = \"replace\".
|
|
The value of conflict.max-record-rows has been converted to 0.`)
|
|
}
|
|
c.MaxRecordRows = 0
|
|
} else {
|
|
if c.MaxRecordRows >= 0 {
|
|
// only warn when it is set by user.
|
|
log.L().Warn("Setting conflict.max-record-rows does not take affect. The value of conflict.max-record-rows has been converted to conflict.threshold.")
|
|
}
|
|
c.MaxRecordRows = c.Threshold
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// NewConfig creates a new Config.
|
|
func NewConfig() *Config {
|
|
return &Config{
|
|
App: Lightning{
|
|
RegionConcurrency: runtime.NumCPU(),
|
|
TableConcurrency: 0,
|
|
IndexConcurrency: 0,
|
|
IOConcurrency: 5,
|
|
CheckRequirements: true,
|
|
TaskInfoSchemaName: defaultTaskInfoSchemaName,
|
|
},
|
|
Checkpoint: Checkpoint{
|
|
Enable: true,
|
|
},
|
|
TiDB: DBStore{
|
|
Host: "127.0.0.1",
|
|
User: "root",
|
|
StatusPort: 10080,
|
|
StrSQLMode: "ONLY_FULL_GROUP_BY,NO_AUTO_CREATE_USER",
|
|
MaxAllowedPacket: defaultMaxAllowedPacket,
|
|
BuildStatsConcurrency: defaultBuildStatsConcurrency,
|
|
DistSQLScanConcurrency: defaultDistSQLScanConcurrency,
|
|
IndexSerialScanConcurrency: defaultIndexSerialScanConcurrency,
|
|
ChecksumTableConcurrency: defaultChecksumTableConcurrency,
|
|
},
|
|
Cron: Cron{
|
|
SwitchMode: Duration{Duration: DefaultSwitchTiKVModeInterval},
|
|
LogProgress: Duration{Duration: 5 * time.Minute},
|
|
CheckDiskQuota: Duration{Duration: 1 * time.Minute},
|
|
},
|
|
Mydumper: MydumperRuntime{
|
|
ReadBlockSize: ReadBlockSize,
|
|
CSV: CSVConfig{
|
|
Separator: ",",
|
|
Delimiter: `"`,
|
|
Header: true,
|
|
HeaderSchemaMatch: true,
|
|
NotNull: false,
|
|
Null: []string{`\N`},
|
|
BackslashEscape: true,
|
|
EscapedBy: `\`,
|
|
TrimLastSep: false,
|
|
},
|
|
StrictFormat: false,
|
|
MaxRegionSize: MaxRegionSize,
|
|
Filter: GetDefaultFilter(),
|
|
DataCharacterSet: defaultCSVDataCharacterSet,
|
|
DataInvalidCharReplace: string(defaultCSVDataInvalidCharReplace),
|
|
},
|
|
TikvImporter: TikvImporter{
|
|
Backend: "",
|
|
MaxKVPairs: 4096,
|
|
SendKVPairs: 32768,
|
|
SendKVSize: KVWriteBatchSize,
|
|
RegionSplitSize: 0,
|
|
RegionSplitBatchSize: DefaultRegionSplitBatchSize,
|
|
RegionSplitConcurrency: runtime.GOMAXPROCS(0),
|
|
RegionCheckBackoffLimit: DefaultRegionCheckBackoffLimit,
|
|
DiskQuota: ByteSize(math.MaxInt64),
|
|
DuplicateResolution: NoneOnDup,
|
|
PausePDSchedulerScope: PausePDSchedulerScopeTable,
|
|
BlockSize: 16 * 1024,
|
|
LogicalImportBatchSize: ByteSize(defaultLogicalImportBatchSize),
|
|
LogicalImportBatchRows: defaultLogicalImportBatchRows,
|
|
},
|
|
PostRestore: PostRestore{
|
|
Checksum: OpLevelRequired,
|
|
Analyze: OpLevelOptional,
|
|
PostProcessAtLast: true,
|
|
ChecksumViaSQL: false,
|
|
},
|
|
Conflict: Conflict{
|
|
Strategy: NoneOnDup,
|
|
PrecheckConflictBeforeImport: false,
|
|
Threshold: -1,
|
|
MaxRecordRows: -1,
|
|
},
|
|
}
|
|
}
|
|
|
|
// LoadFromGlobal resets the current configuration to the global settings.
|
|
func (cfg *Config) LoadFromGlobal(global *GlobalConfig) error {
|
|
if err := cfg.LoadFromTOML(global.ConfigFileContent); err != nil {
|
|
return err
|
|
}
|
|
|
|
cfg.TiDB.Host = global.TiDB.Host
|
|
cfg.TiDB.Port = global.TiDB.Port
|
|
cfg.TiDB.User = global.TiDB.User
|
|
cfg.TiDB.Psw = global.TiDB.Psw
|
|
cfg.TiDB.StatusPort = global.TiDB.StatusPort
|
|
cfg.TiDB.PdAddr = global.TiDB.PdAddr
|
|
cfg.Mydumper.NoSchema = global.Mydumper.NoSchema
|
|
cfg.Mydumper.SourceDir = global.Mydumper.SourceDir
|
|
cfg.Mydumper.Filter = global.Mydumper.Filter
|
|
cfg.TikvImporter.Backend = global.TikvImporter.Backend
|
|
cfg.TikvImporter.SortedKVDir = global.TikvImporter.SortedKVDir
|
|
cfg.Checkpoint.Enable = global.Checkpoint.Enable
|
|
cfg.PostRestore.Checksum = global.PostRestore.Checksum
|
|
cfg.PostRestore.Analyze = global.PostRestore.Analyze
|
|
cfg.App.CheckRequirements = global.App.CheckRequirements
|
|
cfg.Security = global.Security
|
|
cfg.Mydumper.IgnoreColumns = global.Mydumper.IgnoreColumns
|
|
return nil
|
|
}
|
|
|
|
// LoadFromTOML overwrites the current configuration by the TOML data
|
|
// If data contains toml items not in Config and GlobalConfig, return an error
|
|
// If data contains toml items not in Config, thus won't take effect, warn user
|
|
func (cfg *Config) LoadFromTOML(data []byte) error {
|
|
// bothUnused saves toml items not belong to Config nor GlobalConfig
|
|
var bothUnused []string
|
|
// warnItems saves legal toml items but won't effect
|
|
var warnItems []string
|
|
|
|
dataStr := string(data)
|
|
|
|
// Here we load toml into cfg, and rest logic is check unused keys
|
|
metaData, err := toml.Decode(dataStr, cfg)
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
unusedConfigKeys := metaData.Undecoded()
|
|
if len(unusedConfigKeys) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Now we deal with potential both-unused keys of Config and GlobalConfig struct
|
|
|
|
metaDataGlobal, err := toml.Decode(dataStr, &GlobalConfig{})
|
|
if err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
|
|
// Key type returned by metadata.Undecoded doesn't have a equality comparison,
|
|
// we convert them to string type instead, and this conversion is identical
|
|
unusedGlobalKeys := metaDataGlobal.Undecoded()
|
|
unusedGlobalKeyStrs := make(map[string]struct{})
|
|
for _, key := range unusedGlobalKeys {
|
|
unusedGlobalKeyStrs[key.String()] = struct{}{}
|
|
}
|
|
|
|
iterateUnusedKeys:
|
|
for _, key := range unusedConfigKeys {
|
|
keyStr := key.String()
|
|
switch keyStr {
|
|
// these keys are not counted as decoded by toml decoder, but actually they are decoded,
|
|
// because the corresponding unmarshal logic handles these key's decoding in a custom way
|
|
case "lightning.max-error.type",
|
|
"lightning.max-error.conflict":
|
|
continue iterateUnusedKeys
|
|
}
|
|
if _, found := unusedGlobalKeyStrs[keyStr]; found {
|
|
bothUnused = append(bothUnused, keyStr)
|
|
} else {
|
|
warnItems = append(warnItems, keyStr)
|
|
}
|
|
}
|
|
|
|
if len(bothUnused) > 0 {
|
|
return errors.Errorf("config file contained unknown configuration options: %s",
|
|
strings.Join(bothUnused, ", "))
|
|
}
|
|
|
|
// Warn that some legal field of config file won't be overwritten, such as lightning.file
|
|
if len(warnItems) > 0 {
|
|
log.L().Warn("currently only per-task configuration can be applied, global configuration changes can only be made on startup",
|
|
zap.Strings("global config changes", warnItems))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Adjust fixes the invalid or unspecified settings to reasonable valid values,
|
|
// and checks for illegal configuration.
|
|
func (cfg *Config) Adjust(ctx context.Context) error {
|
|
// note that the argument of `adjust` should be `adjust`ed before using it.
|
|
|
|
if err := cfg.TikvImporter.adjust(); err != nil {
|
|
return err
|
|
}
|
|
cfg.App.adjust(&cfg.TikvImporter)
|
|
if err := cfg.Mydumper.adjust(); err != nil {
|
|
return err
|
|
}
|
|
cfg.PostRestore.adjust(&cfg.TikvImporter)
|
|
tlsObj, err := cfg.ToTLS()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err = cfg.TiDB.adjust(ctx, &cfg.TikvImporter, &cfg.Security, tlsObj); err != nil {
|
|
return err
|
|
}
|
|
cfg.Checkpoint.adjust(&cfg.TiDB)
|
|
if err = cfg.Routes.adjust(&cfg.Mydumper); err != nil {
|
|
return err
|
|
}
|
|
return cfg.Conflict.adjust(&cfg.TikvImporter)
|
|
}
|