Files
tidb/pkg/lightning/config/config.go

1636 lines
58 KiB
Go

// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package config
import (
"context"
"crypto/tls"
"encoding/json"
"fmt"
"math"
"net"
"net/url"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
"unicode/utf8"
"github.com/BurntSushi/toml"
"github.com/docker/go-units"
gomysql "github.com/go-sql-driver/mysql"
"github.com/pingcap/errors"
tidbcfg "github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/lightning/common"
"github.com/pingcap/tidb/pkg/lightning/log"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/util"
filter "github.com/pingcap/tidb/pkg/util/table-filter"
router "github.com/pingcap/tidb/pkg/util/table-router"
"go.uber.org/atomic"
"go.uber.org/zap"
)
// constants for config items
const (
// ImportMode defines mode of import for tikv.
ImportMode = "import"
// NormalMode defines mode of normal for tikv.
NormalMode = "normal"
// BackendTiDB is a constant for choosing the "TiDB" backend in the configuration.
BackendTiDB = "tidb"
// BackendLocal is a constant for choosing the "Local" backup in the configuration.
// In this mode, we write & sort kv pairs with local storage and directly write them to tikv.
BackendLocal = "local"
// CheckpointDriverMySQL is a constant for choosing the "MySQL" checkpoint driver in the configuration.
CheckpointDriverMySQL = "mysql"
// CheckpointDriverFile is a constant for choosing the "File" checkpoint driver in the configuration.
CheckpointDriverFile = "file"
// KVWriteBatchSize batch size when write to TiKV.
// this is the default value of linux send buffer size(net.ipv4.tcp_wmem) too.
KVWriteBatchSize = 16 * units.KiB
DefaultRangeConcurrency = 16
defaultDistSQLScanConcurrency = 15
defaultBuildStatsConcurrency = 20
defaultIndexSerialScanConcurrency = 20
defaultChecksumTableConcurrency = 2
DefaultTableConcurrency = 6
defaultIndexConcurrency = 2
DefaultRegionCheckBackoffLimit = 1800
DefaultRegionSplitBatchSize = 4096
defaultLogicalImportBatchSize = 96 * units.KiB
defaultLogicalImportBatchRows = 65536
// defaultMetaSchemaName is the default database name used to store lightning metadata
defaultMetaSchemaName = "lightning_metadata"
defaultTaskInfoSchemaName = "lightning_task_info"
DefaultRecordDuplicateThreshold = 10000
// autoDiskQuotaLocalReservedSpeed is the estimated size increase per
// millisecond per write thread the local backend may gain on all engines.
// This is used to compute the maximum size overshoot between two disk quota
// checks, if the first one has barely passed.
//
// With cron.check-disk-quota = 1m, region-concurrency = 40, this should
// contribute 2.3 GiB to the reserved size.
// autoDiskQuotaLocalReservedSpeed uint64 = 1 * units.KiB
DefaultEngineMemCacheSize = 512 * units.MiB
DefaultLocalWriterMemCacheSize = 128 * units.MiB
DefaultBlockSize = 16 * units.KiB
defaultCSVDataCharacterSet = "binary"
defaultCSVDataInvalidCharReplace = utf8.RuneError
DefaultSwitchTiKVModeInterval = 5 * time.Minute
)
var (
supportedStorageTypes = []string{"file", "local", "s3", "noop", "gcs", "gs"}
defaultFilter = []string{
"*.*",
"!mysql.*",
"!sys.*",
"!INFORMATION_SCHEMA.*",
"!PERFORMANCE_SCHEMA.*",
"!METRICS_SCHEMA.*",
"!INSPECTION_SCHEMA.*",
}
)
// GetDefaultFilter gets the default table filter used in Lightning.
// It clones the original default filter,
// so that the original value won't be changed when the returned slice's element is changed.
func GetDefaultFilter() []string {
return append([]string{}, defaultFilter...)
}
// DBStore is the database connection information.
type DBStore struct {
Host string `toml:"host" json:"host"`
Port int `toml:"port" json:"port"`
User string `toml:"user" json:"user"`
Psw string `toml:"password" json:"-"`
StatusPort int `toml:"status-port" json:"status-port"`
PdAddr string `toml:"pd-addr" json:"pd-addr"`
StrSQLMode string `toml:"sql-mode" json:"sql-mode"`
TLS string `toml:"tls" json:"tls"`
Security *Security `toml:"security" json:"security"`
SQLMode mysql.SQLMode `toml:"-" json:"-"`
MaxAllowedPacket uint64 `toml:"max-allowed-packet" json:"max-allowed-packet"`
DistSQLScanConcurrency int `toml:"distsql-scan-concurrency" json:"distsql-scan-concurrency"`
BuildStatsConcurrency int `toml:"build-stats-concurrency" json:"build-stats-concurrency"`
IndexSerialScanConcurrency int `toml:"index-serial-scan-concurrency" json:"index-serial-scan-concurrency"`
ChecksumTableConcurrency int `toml:"checksum-table-concurrency" json:"checksum-table-concurrency"`
Vars map[string]string `toml:"session-vars" json:"vars"`
IOTotalBytes *atomic.Uint64 `toml:"-" json:"-"`
UUID string `toml:"-" json:"-"`
}
// adjust assigns default values and check illegal values. The arguments must be
// adjusted before calling this function.
func (d *DBStore) adjust(
ctx context.Context,
i *TikvImporter,
s *Security,
tlsObj *common.TLS,
) error {
if i.Backend == BackendLocal {
if d.BuildStatsConcurrency == 0 {
d.BuildStatsConcurrency = defaultBuildStatsConcurrency
}
if d.IndexSerialScanConcurrency == 0 {
d.IndexSerialScanConcurrency = defaultIndexSerialScanConcurrency
}
if d.ChecksumTableConcurrency == 0 {
d.ChecksumTableConcurrency = defaultChecksumTableConcurrency
}
}
var err error
d.SQLMode, err = mysql.GetSQLMode(d.StrSQLMode)
if err != nil {
return common.ErrInvalidConfig.Wrap(err).GenWithStack("`mydumper.tidb.sql_mode` must be a valid SQL_MODE")
}
if d.Security == nil {
d.Security = &Security{
CAPath: s.CAPath,
CertPath: s.CertPath,
KeyPath: s.KeyPath,
CABytes: s.CABytes,
CertBytes: s.CertBytes,
KeyBytes: s.KeyBytes,
RedactInfoLog: s.RedactInfoLog,
TLSConfig: s.TLSConfig,
AllowFallbackToPlaintext: s.AllowFallbackToPlaintext,
}
}
switch d.TLS {
case "preferred":
d.Security.AllowFallbackToPlaintext = true
fallthrough
case "skip-verify":
if d.Security.TLSConfig == nil {
/* #nosec G402 */
d.Security.TLSConfig = &tls.Config{
MinVersion: tls.VersionTLS12,
InsecureSkipVerify: true,
NextProtos: []string{"h2", "http/1.1"}, // specify `h2` to let Go use HTTP/2.
}
} else {
d.Security.TLSConfig.InsecureSkipVerify = true
}
case "cluster":
if len(s.CAPath) == 0 {
return common.ErrInvalidConfig.GenWithStack("cannot set `tidb.tls` to 'cluster' without a [security] section")
}
case "":
case "false":
d.Security.TLSConfig = nil
d.Security.CAPath = ""
d.Security.CertPath = ""
d.Security.CABytes = nil
d.Security.CertBytes = nil
d.Security.KeyPath = ""
d.Security.KeyBytes = nil
default:
return common.ErrInvalidConfig.GenWithStack("unsupported `tidb.tls` config %s", d.TLS)
}
mustHaveInternalConnections := i.Backend == BackendLocal
// automatically determine the TiDB port & PD address from TiDB settings
if mustHaveInternalConnections && (d.Port <= 0 || len(d.PdAddr) == 0) {
var settings tidbcfg.Config
err = tlsObj.GetJSON(ctx, "/settings", &settings)
if err != nil {
return common.ErrInvalidConfig.Wrap(err).GenWithStack("cannot fetch settings from TiDB, please manually fill in `tidb.port` and `tidb.pd-addr`")
}
if d.Port <= 0 {
d.Port = int(settings.Port)
}
if len(d.PdAddr) == 0 {
// verify that it is not a empty string
pdAddrs := strings.Split(settings.Path, ",")
for _, ip := range pdAddrs {
ipPort := strings.Split(ip, ":")
if len(ipPort[0]) == 0 {
return common.ErrInvalidConfig.GenWithStack("invalid `tidb.pd-addr` setting")
}
if len(ipPort[1]) == 0 || ipPort[1] == "0" {
return common.ErrInvalidConfig.GenWithStack("invalid `tidb.port` setting")
}
}
d.PdAddr = settings.Path
}
}
if d.Port <= 0 {
return common.ErrInvalidConfig.GenWithStack("invalid `tidb.port` setting")
}
if mustHaveInternalConnections && len(d.PdAddr) == 0 {
return common.ErrInvalidConfig.GenWithStack("invalid `tidb.pd-addr` setting")
}
return nil
}
// Routes is a alias of []*router.TableRule. It's used to attach method to []*router.TableRule.
type Routes []*router.TableRule
func (r *Routes) adjust(m *MydumperRuntime) error {
for _, rule := range *r {
if !m.CaseSensitive {
rule.ToLower()
}
if err := rule.Valid(); err != nil {
return common.ErrInvalidConfig.Wrap(err).GenWithStack("file route rule is invalid")
}
}
return nil
}
// Config is the configuration.
type Config struct {
TaskID int64 `toml:"-" json:"id"`
App Lightning `toml:"lightning" json:"lightning"`
TiDB DBStore `toml:"tidb" json:"tidb"`
Checkpoint Checkpoint `toml:"checkpoint" json:"checkpoint"`
Mydumper MydumperRuntime `toml:"mydumper" json:"mydumper"`
TikvImporter TikvImporter `toml:"tikv-importer" json:"tikv-importer"`
PostRestore PostRestore `toml:"post-restore" json:"post-restore"`
Cron Cron `toml:"cron" json:"cron"`
Routes Routes `toml:"routes" json:"routes"`
Security Security `toml:"security" json:"security"`
Conflict Conflict `toml:"conflict" json:"conflict"`
}
// String implements fmt.Stringer interface.
func (cfg *Config) String() string {
bytes, err := json.Marshal(cfg)
if err != nil {
log.L().Error("marshal config to json error", log.ShortError(err))
}
return string(bytes)
}
// ToTLS creates a common.TLS from the config.
func (cfg *Config) ToTLS() (*common.TLS, error) {
hostPort := net.JoinHostPort(cfg.TiDB.Host, strconv.Itoa(cfg.TiDB.StatusPort))
return common.NewTLS(
cfg.Security.CAPath,
cfg.Security.CertPath,
cfg.Security.KeyPath,
hostPort,
cfg.Security.CABytes,
cfg.Security.CertBytes,
cfg.Security.KeyBytes,
)
}
// Lightning is the root configuration of lightning.
type Lightning struct {
TableConcurrency int `toml:"table-concurrency" json:"table-concurrency"`
IndexConcurrency int `toml:"index-concurrency" json:"index-concurrency"`
RegionConcurrency int `toml:"region-concurrency" json:"region-concurrency"`
IOConcurrency int `toml:"io-concurrency" json:"io-concurrency"`
CheckRequirements bool `toml:"check-requirements" json:"check-requirements"`
MetaSchemaName string `toml:"meta-schema-name" json:"meta-schema-name"`
MaxError MaxError `toml:"max-error" json:"max-error"`
// deprecated, use Conflict.MaxRecordRows instead
MaxErrorRecords int64 `toml:"max-error-records" json:"max-error-records"`
TaskInfoSchemaName string `toml:"task-info-schema-name" json:"task-info-schema-name"`
}
// adjust assigns default values and check illegal values. The input TikvImporter
// must be adjusted before calling this function.
func (l *Lightning) adjust(i *TikvImporter) {
switch i.Backend {
case BackendTiDB:
if l.TableConcurrency == 0 {
l.TableConcurrency = l.RegionConcurrency
}
if l.IndexConcurrency == 0 {
l.IndexConcurrency = l.RegionConcurrency
}
case BackendLocal:
if l.IndexConcurrency == 0 {
l.IndexConcurrency = defaultIndexConcurrency
}
if l.TableConcurrency == 0 {
l.TableConcurrency = DefaultTableConcurrency
}
if len(l.MetaSchemaName) == 0 {
l.MetaSchemaName = defaultMetaSchemaName
}
// RegionConcurrency > NumCPU is meaningless.
cpuCount := runtime.NumCPU()
if l.RegionConcurrency > cpuCount {
l.RegionConcurrency = cpuCount
}
}
}
// PostOpLevel represents the level of post-operation.
type PostOpLevel int
// PostOpLevel constants.
const (
OpLevelOff PostOpLevel = iota
OpLevelOptional
OpLevelRequired
)
// UnmarshalTOML implements toml.Unmarshaler interface.
func (t *PostOpLevel) UnmarshalTOML(v any) error {
switch val := v.(type) {
case bool:
if val {
*t = OpLevelRequired
} else {
*t = OpLevelOff
}
case string:
return t.FromStringValue(val)
default:
return errors.Errorf("invalid op level '%v', please choose valid option between ['off', 'optional', 'required']", v)
}
return nil
}
// MarshalText implements encoding.TextMarshaler interface.
func (t PostOpLevel) MarshalText() ([]byte, error) {
return []byte(t.String()), nil
}
// FromStringValue parse command line parameter.
func (t *PostOpLevel) FromStringValue(s string) error {
switch strings.ToLower(s) {
//nolint:goconst // This 'false' and other 'false's aren't the same.
case "off", "false":
*t = OpLevelOff
case "required", "true":
*t = OpLevelRequired
case "optional":
*t = OpLevelOptional
default:
return errors.Errorf("invalid op level '%s', please choose valid option between ['off', 'optional', 'required']", s)
}
return nil
}
// MarshalJSON implements json.Marshaler interface.
func (t *PostOpLevel) MarshalJSON() ([]byte, error) {
return []byte(`"` + t.String() + `"`), nil
}
// UnmarshalJSON implements json.Unmarshaler interface.
func (t *PostOpLevel) UnmarshalJSON(data []byte) error {
return t.FromStringValue(strings.Trim(string(data), `"`))
}
// String returns the string representation of the level.
func (t PostOpLevel) String() string {
switch t {
case OpLevelOff:
return "off"
case OpLevelOptional:
return "optional"
case OpLevelRequired:
return "required"
default:
panic(fmt.Sprintf("invalid post process type '%d'", t))
}
}
// CheckpointKeepStrategy represents the strategy to keep checkpoint data.
type CheckpointKeepStrategy int
const (
// CheckpointRemove remove checkpoint data
CheckpointRemove CheckpointKeepStrategy = iota
// CheckpointRename keep by rename checkpoint file/db according to task id
CheckpointRename
// CheckpointOrigin keep checkpoint data unchanged
CheckpointOrigin
)
// UnmarshalTOML implements toml.Unmarshaler interface.
func (t *CheckpointKeepStrategy) UnmarshalTOML(v any) error {
switch val := v.(type) {
case bool:
if val {
*t = CheckpointRename
} else {
*t = CheckpointRemove
}
case string:
return t.FromStringValue(val)
default:
return errors.Errorf("invalid checkpoint keep strategy '%v', please choose valid option between ['remove', 'rename', 'origin']", v)
}
return nil
}
// MarshalText implements encoding.TextMarshaler interface.
func (t CheckpointKeepStrategy) MarshalText() ([]byte, error) {
return []byte(t.String()), nil
}
// FromStringValue parser command line parameter.
func (t *CheckpointKeepStrategy) FromStringValue(s string) error {
switch strings.ToLower(s) {
//nolint:goconst // This 'false' and other 'false's aren't the same.
case "remove", "false":
*t = CheckpointRemove
case "rename", "true":
*t = CheckpointRename
case "origin":
*t = CheckpointOrigin
default:
return errors.Errorf("invalid checkpoint keep strategy '%s', please choose valid option between ['remove', 'rename', 'origin']", s)
}
return nil
}
// MarshalJSON implements json.Marshaler interface.
func (t *CheckpointKeepStrategy) MarshalJSON() ([]byte, error) {
return []byte(`"` + t.String() + `"`), nil
}
// UnmarshalJSON implements json.Unmarshaler interface.
func (t *CheckpointKeepStrategy) UnmarshalJSON(data []byte) error {
return t.FromStringValue(strings.Trim(string(data), `"`))
}
// String implements fmt.Stringer interface.
func (t CheckpointKeepStrategy) String() string {
switch t {
case CheckpointRemove:
return "remove"
case CheckpointRename:
return "rename"
case CheckpointOrigin:
return "origin"
default:
panic(fmt.Sprintf("invalid post process type '%d'", t))
}
}
// MaxError configures the maximum number of acceptable errors per kind.
type MaxError struct {
// Syntax is the maximum number of syntax errors accepted.
// When tolerated, the file chunk causing syntax error will be skipped, and adds 1 to the counter.
// TODO Currently this is hard-coded to zero.
Syntax atomic.Int64 `toml:"syntax" json:"-"`
// Charset is the maximum number of character-set conversion errors accepted.
// When tolerated, and `data-invalid-char-replace` is not changed from "\ufffd",
// every invalid byte in the source file will be converted to U+FFFD and adds 1 to the counter.
// Note that a failed conversion a column's character set (e.g. UTF8-to-GBK conversion)
// is counted as a type error, not a charset error.
// TODO character-set conversion is not yet implemented.
Charset atomic.Int64 `toml:"charset" json:"-"`
// Type is the maximum number of type errors accepted.
// This includes strict-mode errors such as zero in dates, integer overflow, character string too long, etc.
// In TiDB backend, this also includes all possible SQL errors raised from INSERT,
// such as unique key conflict when `on-duplicate` is set to `error`.
// When tolerated, the row causing the error will be skipped, and adds 1 to the counter.
// The default value is zero, which means that such errors are not tolerated.
Type atomic.Int64 `toml:"type" json:"type"`
// deprecated, use `conflict.threshold` instead.
// Conflict is the maximum number of unique key conflicts in local backend accepted.
// When tolerated, every pair of conflict adds 1 to the counter.
// Those pairs will NOT be deleted from the target. Conflict resolution is performed separately.
// The default value is max int64, which means conflict errors will be recorded as much as possible.
// Sometime the actual number of conflict record logged will be greater than the value configured here,
// because conflict error data are recorded batch by batch.
// If the limit is reached in a single batch, the entire batch of records will be persisted before an error is reported.
Conflict atomic.Int64 `toml:"conflict" json:"conflict"`
}
// UnmarshalTOML implements toml.Unmarshaler interface.
func (cfg *MaxError) UnmarshalTOML(v any) error {
defaultValMap := map[string]int64{
"syntax": 0,
"charset": math.MaxInt64,
"type": 0,
}
// set default value first
cfg.Syntax.Store(defaultValMap["syntax"])
cfg.Charset.Store(defaultValMap["charset"])
cfg.Type.Store(defaultValMap["type"])
switch val := v.(type) {
case int64:
// ignore val that is smaller than 0
if val >= 0 {
// only set type error
cfg.Type.Store(val)
}
return nil
case map[string]any:
// support stuff like `max-error = { charset = 1000, type = 1000 }`.
getVal := func(k string, v any) int64 {
defaultVal, ok := defaultValMap[k]
if !ok {
return 0
}
iVal, ok := v.(int64)
if !ok || iVal < 0 {
return defaultVal
}
return iVal
}
for k, v := range val {
if k == "type" {
cfg.Type.Store(getVal(k, v))
}
}
return nil
default:
return errors.Errorf("invalid max-error '%v', should be an integer or a map of string:int64", v)
}
}
// PausePDSchedulerScope the scope when pausing pd schedulers.
type PausePDSchedulerScope string
// constants for PausePDSchedulerScope.
const (
// PausePDSchedulerScopeTable pause scheduler by adding schedule=deny label to target key range of the table.
PausePDSchedulerScopeTable PausePDSchedulerScope = "table"
// PausePDSchedulerScopeGlobal pause scheduler by remove global schedulers.
// schedulers removed includes:
// - balance-leader-scheduler
// - balance-hot-region-scheduler
// - balance-region-scheduler
// - shuffle-leader-scheduler
// - shuffle-region-scheduler
// - shuffle-hot-region-scheduler
// and we also set configs below:
// - max-merge-region-keys = 0
// - max-merge-region-size = 0
// - leader-schedule-limit = min(40, <store-count> * <current value of leader-schedule-limit>)
// - region-schedule-limit = min(40, <store-count> * <current value of region-schedule-limit>)
// - max-snapshot-count = min(40, <store-count> * <current value of max-snapshot-count>)
// - enable-location-replacement = false
// - max-pending-peer-count = math.MaxInt32
// see br/pkg/pdutil/pd.go for more detail.
PausePDSchedulerScopeGlobal PausePDSchedulerScope = "global"
)
// DuplicateResolutionAlgorithm is the config type of how to resolve duplicates.
type DuplicateResolutionAlgorithm int
const (
// NoneOnDup does nothing when detecting duplicate.
NoneOnDup DuplicateResolutionAlgorithm = iota
// ReplaceOnDup indicates using REPLACE INTO to insert data for TiDB backend.
// ReplaceOnDup records all duplicate records, remove some rows with conflict
// and reserve other rows that can be kept and not cause conflict anymore for local backend.
// Users need to analyze the lightning_task_info.conflict_view table to check whether the reserved data
// cater to their need and check whether they need to add back the correct rows.
ReplaceOnDup
// IgnoreOnDup indicates using INSERT IGNORE INTO to insert data for TiDB backend.
// Local backend does not support IgnoreOnDup.
IgnoreOnDup
// ErrorOnDup indicates using INSERT INTO to insert data for TiDB backend, which would violate PK or UNIQUE constraint when detecting duplicate.
// ErrorOnDup reports an error after detecting the first conflict and stops the import process for local backend.
ErrorOnDup
)
// UnmarshalTOML implements the toml.Unmarshaler interface.
func (dra *DuplicateResolutionAlgorithm) UnmarshalTOML(v any) error {
if val, ok := v.(string); ok {
return dra.FromStringValue(val)
}
return errors.Errorf("invalid conflict.strategy '%v', please choose valid option between ['', 'replace', 'ignore', 'error']", v)
}
// MarshalText implements the encoding.TextMarshaler interface.
func (dra DuplicateResolutionAlgorithm) MarshalText() ([]byte, error) {
return []byte(dra.String()), nil
}
// FromStringValue parses the string value to the DuplicateResolutionAlgorithm.
func (dra *DuplicateResolutionAlgorithm) FromStringValue(s string) error {
switch strings.ToLower(s) {
case "", "none":
*dra = NoneOnDup
case "replace":
*dra = ReplaceOnDup
case "ignore":
*dra = IgnoreOnDup
case "error":
*dra = ErrorOnDup
case "remove", "record":
log.L().Warn("\"conflict.strategy '%s' is no longer supported, has been converted to 'replace'")
*dra = ReplaceOnDup
default:
return errors.Errorf("invalid conflict.strategy '%s', please choose valid option between ['', 'replace', 'ignore', 'error']", s)
}
return nil
}
// MarshalJSON implements the json.Marshaler interface.
func (dra *DuplicateResolutionAlgorithm) MarshalJSON() ([]byte, error) {
return []byte(`"` + dra.String() + `"`), nil
}
// UnmarshalJSON implements the json.Unmarshaler interface.
func (dra *DuplicateResolutionAlgorithm) UnmarshalJSON(data []byte) error {
return dra.FromStringValue(strings.Trim(string(data), `"`))
}
// String implements the fmt.Stringer interface.
func (dra DuplicateResolutionAlgorithm) String() string {
switch dra {
case NoneOnDup:
return ""
case ReplaceOnDup:
return "replace"
case IgnoreOnDup:
return "ignore"
case ErrorOnDup:
return "error"
default:
panic(fmt.Sprintf("invalid conflict.strategy type '%d'", dra))
}
}
// CompressionType is the config type of compression algorithm.
type CompressionType int
const (
// CompressionNone means no compression.
CompressionNone CompressionType = iota
// CompressionGzip means gzip compression.
CompressionGzip
)
// UnmarshalTOML implements toml.Unmarshaler.
func (t *CompressionType) UnmarshalTOML(v any) error {
if val, ok := v.(string); ok {
return t.FromStringValue(val)
}
return errors.Errorf("invalid compression-type '%v', please choose valid option between ['gzip']", v)
}
// MarshalText implements encoding.TextMarshaler.
func (t CompressionType) MarshalText() ([]byte, error) {
return []byte(t.String()), nil
}
// FromStringValue parses a string to CompressionType.
func (t *CompressionType) FromStringValue(s string) error {
switch strings.ToLower(s) {
case "":
*t = CompressionNone
case "gz", "gzip":
*t = CompressionGzip
default:
return errors.Errorf("invalid compression-type '%s', please choose valid option between ['gzip']", s)
}
return nil
}
// MarshalJSON implements json.Marshaler.
func (t *CompressionType) MarshalJSON() ([]byte, error) {
return []byte(`"` + t.String() + `"`), nil
}
// UnmarshalJSON implements json.Unmarshaler.
func (t *CompressionType) UnmarshalJSON(data []byte) error {
return t.FromStringValue(strings.Trim(string(data), `"`))
}
// String implements fmt.Stringer.
func (t CompressionType) String() string {
switch t {
case CompressionGzip:
return "gzip"
case CompressionNone:
return ""
default:
panic(fmt.Sprintf("invalid compression type '%d'", t))
}
}
// PostRestore has some options which will be executed after kv restored.
type PostRestore struct {
Checksum PostOpLevel `toml:"checksum" json:"checksum"`
Analyze PostOpLevel `toml:"analyze" json:"analyze"`
Level1Compact bool `toml:"level-1-compact" json:"level-1-compact"`
PostProcessAtLast bool `toml:"post-process-at-last" json:"post-process-at-last"`
Compact bool `toml:"compact" json:"compact"`
ChecksumViaSQL bool `toml:"checksum-via-sql" json:"checksum-via-sql"`
}
// adjust assigns default values and check illegal values. The input TikvImporter
// must be adjusted before calling this function.
func (p *PostRestore) adjust(i *TikvImporter) {
if i.Backend != BackendTiDB {
return
}
p.Checksum = OpLevelOff
p.Analyze = OpLevelOff
p.Compact = false
p.ChecksumViaSQL = false
}
// StringOrStringSlice can unmarshal a TOML string as string slice with one element.
type StringOrStringSlice []string
// UnmarshalTOML implements the toml.Unmarshaler interface.
func (s *StringOrStringSlice) UnmarshalTOML(in any) error {
switch v := in.(type) {
case string:
*s = []string{v}
case []any:
*s = make([]string, 0, len(v))
for _, vv := range v {
vs, ok := vv.(string)
if !ok {
return errors.Errorf("invalid string slice '%v'", in)
}
*s = append(*s, vs)
}
default:
return errors.Errorf("invalid string slice '%v'", in)
}
return nil
}
// CSVConfig is the config for CSV files.
type CSVConfig struct {
// Separator, Delimiter and Terminator should all be in utf8mb4 encoding.
Separator string `toml:"separator" json:"separator"`
Delimiter string `toml:"delimiter" json:"delimiter"`
Terminator string `toml:"terminator" json:"terminator"`
Null StringOrStringSlice `toml:"null" json:"null"`
Header bool `toml:"header" json:"header"`
HeaderSchemaMatch bool `toml:"header-schema-match" json:"header-schema-match"`
TrimLastSep bool `toml:"trim-last-separator" json:"trim-last-separator"`
NotNull bool `toml:"not-null" json:"not-null"`
// deprecated, use `escaped-by` instead.
BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"`
// EscapedBy has higher priority than BackslashEscape, currently it must be a single character if set.
EscapedBy string `toml:"escaped-by" json:"escaped-by"`
// hide these options for lightning configuration file, they can only be used by LOAD DATA
// https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-field-line-handling
StartingBy string `toml:"-" json:"-"`
AllowEmptyLine bool `toml:"-" json:"-"`
// For non-empty Delimiter (for example quotes), null elements inside quotes are not considered as null except for
// `\N` (when escape-by is `\`). That is to say, `\N` is special for null because it always means null.
QuotedNullIsText bool `toml:"-" json:"-"`
// ref https://dev.mysql.com/doc/refman/8.0/en/load-data.html
// > If the field begins with the ENCLOSED BY character, instances of that character are recognized as terminating a
// > field value only if followed by the field or line TERMINATED BY sequence.
// This means we will meet unescaped quote in a quoted field
// > The "BIG" boss -> The "BIG" boss
// This means we will meet unescaped quote in a unquoted field
UnescapedQuote bool `toml:"-" json:"-"`
}
func (csv *CSVConfig) adjust() error {
if len(csv.Separator) == 0 {
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.separator` must not be empty")
}
if len(csv.Delimiter) > 0 && (strings.HasPrefix(csv.Separator, csv.Delimiter) || strings.HasPrefix(csv.Delimiter, csv.Separator)) {
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other")
}
if len(csv.EscapedBy) > 1 {
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.escaped-by` must be empty or a single character")
}
if csv.BackslashEscape && csv.EscapedBy == "" {
csv.EscapedBy = `\`
}
if !csv.BackslashEscape && csv.EscapedBy == `\` {
csv.EscapedBy = ""
}
// keep compatibility with old behaviour
if !csv.NotNull && len(csv.Null) == 0 {
csv.Null = []string{""}
}
if len(csv.EscapedBy) > 0 {
if csv.Separator == csv.EscapedBy {
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV separator and `mydumper.csv.escaped-by`", csv.EscapedBy)
}
if csv.Delimiter == csv.EscapedBy {
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV delimiter and `mydumper.csv.escaped-by`", csv.EscapedBy)
}
if csv.Terminator == csv.EscapedBy {
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV terminator and `mydumper.csv.escaped-by`", csv.EscapedBy)
}
}
return nil
}
// MydumperRuntime is the runtime config for mydumper.
type MydumperRuntime struct {
ReadBlockSize ByteSize `toml:"read-block-size" json:"read-block-size"`
BatchSize ByteSize `toml:"batch-size" json:"batch-size"`
BatchImportRatio float64 `toml:"batch-import-ratio" json:"batch-import-ratio"`
SourceID string `toml:"source-id" json:"source-id"`
SourceDir string `toml:"data-source-dir" json:"data-source-dir"`
CharacterSet string `toml:"character-set" json:"character-set"`
CSV CSVConfig `toml:"csv" json:"csv"`
MaxRegionSize ByteSize `toml:"max-region-size" json:"max-region-size"`
Filter []string `toml:"filter" json:"filter"`
FileRouters []*FileRouteRule `toml:"files" json:"files"`
// Deprecated: only used to keep the compatibility.
NoSchema bool `toml:"no-schema" json:"no-schema"`
CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"`
StrictFormat bool `toml:"strict-format" json:"strict-format"`
DefaultFileRules bool `toml:"default-file-rules" json:"default-file-rules"`
IgnoreColumns AllIgnoreColumns `toml:"ignore-data-columns" json:"ignore-data-columns"`
// DataCharacterSet is the character set of the source file. Only CSV files are supported now. The following options are supported.
// - utf8mb4
// - GB18030
// - GBK: an extension of the GB2312 character set and is also known as Code Page 936.
// - latin1: IANA Windows1252
// - binary: no attempt to convert the encoding.
// Leave DataCharacterSet empty will make it use `binary` by default.
DataCharacterSet string `toml:"data-character-set" json:"data-character-set"`
// DataInvalidCharReplace is the replacement characters for non-compatible characters, which shouldn't duplicate with the separators or line breaks.
// Changing the default value will result in increased parsing time. Non-compatible characters do not cause an increase in error.
DataInvalidCharReplace string `toml:"data-invalid-char-replace" json:"data-invalid-char-replace"`
}
func (m *MydumperRuntime) adjust() error {
if err := m.CSV.adjust(); err != nil {
return err
}
if m.StrictFormat && len(m.CSV.Terminator) == 0 {
return common.ErrInvalidConfig.GenWithStack(
`mydumper.strict-format can not be used with empty mydumper.csv.terminator. Please set mydumper.csv.terminator to a non-empty value like "\r\n"`)
}
for _, rule := range m.FileRouters {
if filepath.IsAbs(rule.Path) {
relPath, err := filepath.Rel(m.SourceDir, rule.Path)
if err != nil {
return common.ErrInvalidConfig.Wrap(err).
GenWithStack("cannot find relative path for file route path %s", rule.Path)
}
// ".." means that this path is not in source dir, so we should return an error
if strings.HasPrefix(relPath, "..") {
return common.ErrInvalidConfig.GenWithStack(
"file route path '%s' is not in source dir '%s'", rule.Path, m.SourceDir)
}
rule.Path = relPath
}
}
// enable default file route rule if no rules are set
if len(m.FileRouters) == 0 {
m.DefaultFileRules = true
}
if len(m.DataCharacterSet) == 0 {
m.DataCharacterSet = defaultCSVDataCharacterSet
}
charset, err1 := ParseCharset(m.DataCharacterSet)
if err1 != nil {
return common.ErrInvalidConfig.Wrap(err1).GenWithStack("invalid `mydumper.data-character-set`")
}
if charset == GBK || charset == GB18030 {
log.L().Warn(
"incompatible strings may be encountered during the transcoding process and will be replaced, please be aware of the risk of not being able to retain the original information",
zap.String("source-character-set", charset.String()),
zap.ByteString("invalid-char-replacement", []byte(m.DataInvalidCharReplace)))
}
if m.BatchImportRatio < 0.0 || m.BatchImportRatio >= 1.0 {
m.BatchImportRatio = DefaultBatchImportRatio
}
if m.ReadBlockSize <= 0 {
m.ReadBlockSize = ReadBlockSize
}
if len(m.CharacterSet) == 0 {
m.CharacterSet = "auto"
}
if len(m.IgnoreColumns) != 0 {
// Tolower columns cause we use Name.L to compare column in tidb.
for _, ig := range m.IgnoreColumns {
cols := make([]string, len(ig.Columns))
for i, col := range ig.Columns {
cols[i] = strings.ToLower(col)
}
ig.Columns = cols
}
}
return m.adjustFilePath()
}
// adjustFilePath checks and adjusts the file path.
func (m *MydumperRuntime) adjustFilePath() error {
var u *url.URL
// An absolute Windows path like "C:\Users\XYZ" would be interpreted as
// an URL with scheme "C" and opaque data "\Users\XYZ".
// Therefore, we only perform URL parsing if we are sure the path is not
// an absolute Windows path.
// Here we use the `filepath.VolumeName` which can identify the "C:" part
// out of the path. On Linux this method always return an empty string.
// On Windows, the drive letter can only be single letters from "A:" to "Z:",
// so this won't mistake "S3:" as a Windows path.
if len(filepath.VolumeName(m.SourceDir)) == 0 {
var err error
u, err = url.Parse(m.SourceDir)
if err != nil {
return common.ErrInvalidConfig.Wrap(err).GenWithStack("cannot parse `mydumper.data-source-dir` %s", m.SourceDir)
}
} else {
u = &url.URL{}
}
// convert path and relative path to a valid file url
if u.Scheme == "" {
if m.SourceDir == "" {
return common.ErrInvalidConfig.GenWithStack("`mydumper.data-source-dir` is not set")
}
if !common.IsDirExists(m.SourceDir) {
return common.ErrInvalidConfig.GenWithStack("'%s': `mydumper.data-source-dir` does not exist", m.SourceDir)
}
absPath, err := filepath.Abs(m.SourceDir)
if err != nil {
return common.ErrInvalidConfig.Wrap(err).GenWithStack("covert data-source-dir '%s' to absolute path failed", m.SourceDir)
}
u.Path = filepath.ToSlash(absPath)
u.Scheme = "file"
m.SourceDir = u.String()
}
found := false
for _, t := range supportedStorageTypes {
if u.Scheme == t {
found = true
break
}
}
if !found {
return common.ErrInvalidConfig.GenWithStack(
"unsupported data-source-dir url '%s', supported storage types are %s",
m.SourceDir, strings.Join(supportedStorageTypes, ","))
}
return nil
}
// AllIgnoreColumns is a slice of IgnoreColumns.
type AllIgnoreColumns []*IgnoreColumns
// IgnoreColumns is the config for ignoring columns.
type IgnoreColumns struct {
DB string `toml:"db" json:"db"`
Table string `toml:"table" json:"table"`
TableFilter []string `toml:"table-filter" json:"table-filter"`
Columns []string `toml:"columns" json:"columns"`
}
// ColumnsMap returns a map of columns.
func (ic *IgnoreColumns) ColumnsMap() map[string]struct{} {
columnMap := make(map[string]struct{}, len(ic.Columns))
for _, c := range ic.Columns {
columnMap[c] = struct{}{}
}
return columnMap
}
// GetIgnoreColumns gets Ignore config by schema name/regex and table name/regex.
func (igCols AllIgnoreColumns) GetIgnoreColumns(db string, table string, caseSensitive bool) (*IgnoreColumns, error) {
if !caseSensitive {
db = strings.ToLower(db)
table = strings.ToLower(table)
}
for i, ig := range igCols {
if ig.DB == db && ig.Table == table {
return igCols[i], nil
}
f, err := filter.Parse(ig.TableFilter)
if err != nil {
return nil, common.ErrInvalidConfig.GenWithStack("invalid table filter %s in ignore columns", strings.Join(ig.TableFilter, ","))
}
if f.MatchTable(db, table) {
return igCols[i], nil
}
}
return &IgnoreColumns{Columns: make([]string, 0)}, nil
}
// FileRouteRule is the rule for routing files.
type FileRouteRule struct {
Pattern string `json:"pattern" toml:"pattern" yaml:"pattern"`
Path string `json:"path" toml:"path" yaml:"path"`
Schema string `json:"schema" toml:"schema" yaml:"schema"`
Table string `json:"table" toml:"table" yaml:"table"`
Type string `json:"type" toml:"type" yaml:"type"`
Key string `json:"key" toml:"key" yaml:"key"`
Compression string `json:"compression" toml:"compression" yaml:"compression"`
// unescape the schema/table name only used in lightning's internal logic now.
Unescape bool `json:"-" toml:"-" yaml:"-"`
// TODO: DataCharacterSet here can override the same field in [mydumper.csv] with a higher level.
// This could provide users a more flexible usage to configure different files with
// different data charsets.
// DataCharacterSet string `toml:"data-character-set" json:"data-character-set"`
}
// TikvImporter is the config for tikv-importer.
type TikvImporter struct {
// Deprecated: only used to keep the compatibility.
Addr string `toml:"addr" json:"addr"`
Backend string `toml:"backend" json:"backend"`
// deprecated, use Conflict.Strategy instead.
OnDuplicate DuplicateResolutionAlgorithm `toml:"on-duplicate" json:"on-duplicate"`
MaxKVPairs int `toml:"max-kv-pairs" json:"max-kv-pairs"`
// deprecated
SendKVPairs int `toml:"send-kv-pairs" json:"send-kv-pairs"`
SendKVSize ByteSize `toml:"send-kv-size" json:"send-kv-size"`
CompressKVPairs CompressionType `toml:"compress-kv-pairs" json:"compress-kv-pairs"`
RegionSplitSize ByteSize `toml:"region-split-size" json:"region-split-size"`
RegionSplitKeys int `toml:"region-split-keys" json:"region-split-keys"`
RegionSplitBatchSize int `toml:"region-split-batch-size" json:"region-split-batch-size"`
RegionSplitConcurrency int `toml:"region-split-concurrency" json:"region-split-concurrency"`
RegionCheckBackoffLimit int `toml:"region-check-backoff-limit" json:"region-check-backoff-limit"`
SortedKVDir string `toml:"sorted-kv-dir" json:"sorted-kv-dir"`
DiskQuota ByteSize `toml:"disk-quota" json:"disk-quota"`
RangeConcurrency int `toml:"range-concurrency" json:"range-concurrency"`
// deprecated, use Conflict.Strategy instead.
DuplicateResolution DuplicateResolutionAlgorithm `toml:"duplicate-resolution" json:"duplicate-resolution"`
// deprecated, use ParallelImport instead.
IncrementalImport bool `toml:"incremental-import" json:"incremental-import"`
ParallelImport bool `toml:"parallel-import" json:"parallel-import"`
KeyspaceName string `toml:"keyspace-name" json:"keyspace-name"`
AddIndexBySQL bool `toml:"add-index-by-sql" json:"add-index-by-sql"`
EngineMemCacheSize ByteSize `toml:"engine-mem-cache-size" json:"engine-mem-cache-size"`
LocalWriterMemCacheSize ByteSize `toml:"local-writer-mem-cache-size" json:"local-writer-mem-cache-size"`
StoreWriteBWLimit ByteSize `toml:"store-write-bwlimit" json:"store-write-bwlimit"`
LogicalImportBatchSize ByteSize `toml:"logical-import-batch-size" json:"logical-import-batch-size"`
LogicalImportBatchRows int `toml:"logical-import-batch-rows" json:"logical-import-batch-rows"`
// default is PausePDSchedulerScopeTable to compatible with previous version(>= 6.1)
PausePDSchedulerScope PausePDSchedulerScope `toml:"pause-pd-scheduler-scope" json:"pause-pd-scheduler-scope"`
BlockSize ByteSize `toml:"block-size" json:"block-size"`
}
func (t *TikvImporter) adjust() error {
if t.Backend == "" {
return common.ErrInvalidConfig.GenWithStack("tikv-importer.backend must not be empty!")
}
t.Backend = strings.ToLower(t.Backend)
// only need to assign t.IncrementalImport to t.ParallelImport when t.ParallelImport is false and t.IncrementalImport is true
if !t.ParallelImport && t.IncrementalImport {
t.ParallelImport = t.IncrementalImport
}
switch t.Backend {
case BackendTiDB:
if t.LogicalImportBatchSize <= 0 {
return common.ErrInvalidConfig.GenWithStack(
"`tikv-importer.logical-import-batch-size` got %d, should be larger than 0",
t.LogicalImportBatchSize)
}
if t.LogicalImportBatchRows <= 0 {
return common.ErrInvalidConfig.GenWithStack(
"`tikv-importer.logical-import-batch-rows` got %d, should be larger than 0",
t.LogicalImportBatchRows)
}
case BackendLocal:
if t.RegionSplitBatchSize <= 0 {
return common.ErrInvalidConfig.GenWithStack(
"`tikv-importer.region-split-batch-size` got %d, should be larger than 0",
t.RegionSplitBatchSize)
}
if t.RegionSplitConcurrency <= 0 {
return common.ErrInvalidConfig.GenWithStack(
"`tikv-importer.region-split-concurrency` got %d, should be larger than 0",
t.RegionSplitConcurrency)
}
if t.RangeConcurrency == 0 {
t.RangeConcurrency = DefaultRangeConcurrency
}
if t.EngineMemCacheSize == 0 {
t.EngineMemCacheSize = DefaultEngineMemCacheSize
}
if t.LocalWriterMemCacheSize == 0 {
t.LocalWriterMemCacheSize = DefaultLocalWriterMemCacheSize
}
if t.BlockSize == 0 {
t.BlockSize = DefaultBlockSize
}
if t.ParallelImport && t.AddIndexBySQL {
return common.ErrInvalidConfig.
GenWithStack("tikv-importer.add-index-using-ddl cannot be used with tikv-importer.parallel-import")
}
if len(t.SortedKVDir) == 0 {
return common.ErrInvalidConfig.GenWithStack("tikv-importer.sorted-kv-dir must not be empty!")
}
storageSizeDir := filepath.Clean(t.SortedKVDir)
sortedKVDirInfo, err := os.Stat(storageSizeDir)
switch {
case os.IsNotExist(err):
case err == nil:
if !sortedKVDirInfo.IsDir() {
return common.ErrInvalidConfig.
GenWithStack("tikv-importer.sorted-kv-dir ('%s') is not a directory", storageSizeDir)
}
default:
return common.ErrInvalidConfig.Wrap(err).GenWithStack("invalid tikv-importer.sorted-kv-dir")
}
default:
return common.ErrInvalidConfig.GenWithStack(
"unsupported `tikv-importer.backend` (%s)",
t.Backend)
}
t.PausePDSchedulerScope = PausePDSchedulerScope(strings.ToLower(string(t.PausePDSchedulerScope)))
switch t.PausePDSchedulerScope {
case PausePDSchedulerScopeTable, PausePDSchedulerScopeGlobal:
default:
return common.ErrInvalidConfig.GenWithStack("pause-pd-scheduler-scope is invalid, allowed value include: table, global")
}
return nil
}
// Checkpoint is the config for checkpoint.
type Checkpoint struct {
Schema string `toml:"schema" json:"schema"`
DSN string `toml:"dsn" json:"-"` // DSN may contain password, don't expose this to JSON.
MySQLParam *common.MySQLConnectParam `toml:"-" json:"-"` // For some security reason, we use MySQLParam instead of DSN.
Driver string `toml:"driver" json:"driver"`
Enable bool `toml:"enable" json:"enable"`
KeepAfterSuccess CheckpointKeepStrategy `toml:"keep-after-success" json:"keep-after-success"`
}
// adjust assigns default values and check illegal values. The input DBStore
// must be adjusted before calling this function.
func (c *Checkpoint) adjust(t *DBStore) {
if len(c.Schema) == 0 {
c.Schema = "tidb_lightning_checkpoint"
}
if len(c.Driver) == 0 {
c.Driver = CheckpointDriverFile
}
if len(c.DSN) == 0 {
switch c.Driver {
case CheckpointDriverMySQL:
param := common.MySQLConnectParam{
Host: t.Host,
Port: t.Port,
User: t.User,
Password: t.Psw,
SQLMode: mysql.DefaultSQLMode,
MaxAllowedPacket: defaultMaxAllowedPacket,
TLSConfig: t.Security.TLSConfig,
AllowFallbackToPlaintext: t.Security.AllowFallbackToPlaintext,
}
c.MySQLParam = &param
case CheckpointDriverFile:
c.DSN = "/tmp/" + c.Schema + ".pb"
}
} else {
// try to remove allowAllFiles
mysqlCfg, err := gomysql.ParseDSN(c.DSN)
if err != nil {
return
}
mysqlCfg.AllowAllFiles = false
c.DSN = mysqlCfg.FormatDSN()
}
}
// Cron is the config for cron.
type Cron struct {
SwitchMode Duration `toml:"switch-mode" json:"switch-mode"`
LogProgress Duration `toml:"log-progress" json:"log-progress"`
CheckDiskQuota Duration `toml:"check-disk-quota" json:"check-disk-quota"`
}
// Security is the config for security.
type Security struct {
CAPath string `toml:"ca-path" json:"ca-path"`
CertPath string `toml:"cert-path" json:"cert-path"`
KeyPath string `toml:"key-path" json:"key-path"`
// RedactInfoLog indicates that whether enabling redact log
RedactInfoLog bool `toml:"redact-info-log" json:"redact-info-log"`
TLSConfig *tls.Config `toml:"-" json:"-"`
AllowFallbackToPlaintext bool `toml:"-" json:"-"`
// When DM/engine uses lightning as a library, it can directly pass in the content
CABytes []byte `toml:"-" json:"-"`
CertBytes []byte `toml:"-" json:"-"`
KeyBytes []byte `toml:"-" json:"-"`
}
// BuildTLSConfig builds the tls config which is used by SQL drier later.
func (sec *Security) BuildTLSConfig() error {
if sec == nil || sec.TLSConfig != nil {
return nil
}
tlsConfig, err := util.NewTLSConfig(
util.WithCAPath(sec.CAPath),
util.WithCertAndKeyPath(sec.CertPath, sec.KeyPath),
util.WithCAContent(sec.CABytes),
util.WithCertAndKeyContent(sec.CertBytes, sec.KeyBytes),
)
if err != nil {
return errors.Trace(err)
}
sec.TLSConfig = tlsConfig
return nil
}
// Duration which can be deserialized from a TOML string.
// Implemented as https://github.com/BurntSushi/toml#using-the-encodingtextunmarshaler-interface
type Duration struct {
time.Duration
}
// UnmarshalText implements encoding.TextUnmarshaler.
func (d *Duration) UnmarshalText(text []byte) error {
var err error
d.Duration, err = time.ParseDuration(string(text))
return errors.Trace(err)
}
// MarshalText implements encoding.TextMarshaler.
func (d Duration) MarshalText() ([]byte, error) {
return []byte(d.String()), nil
}
// MarshalJSON implements json.Marshaler.
func (d *Duration) MarshalJSON() ([]byte, error) {
return []byte(fmt.Sprintf(`"%s"`, d.Duration)), nil
}
// Charset defines character set
type Charset int
// Charset constants
const (
Binary Charset = iota
UTF8MB4
GB18030
GBK
Latin1
ASCII
)
// String return the string value of charset
func (c Charset) String() string {
switch c {
case Binary:
return "binary"
case UTF8MB4:
return "utf8mb4"
case GB18030:
return "gb18030"
case GBK:
return "gbk"
case Latin1:
return "latin1"
case ASCII:
return "ascii"
default:
return "unknown_charset"
}
}
// ParseCharset parser character set for string
func ParseCharset(dataCharacterSet string) (Charset, error) {
switch strings.ToLower(dataCharacterSet) {
case "", "binary":
return Binary, nil
case "utf8", "utf8mb4":
return UTF8MB4, nil
case "gb18030":
return GB18030, nil
case "gbk":
return GBK, nil
case "latin1":
return Latin1, nil
case "ascii":
return ASCII, nil
default:
return Binary, errors.Errorf("found unsupported data-character-set: %s", dataCharacterSet)
}
}
// Conflict is the config section for PK/UK conflict related configurations.
type Conflict struct {
Strategy DuplicateResolutionAlgorithm `toml:"strategy" json:"strategy"`
PrecheckConflictBeforeImport bool `toml:"precheck-conflict-before-import" json:"precheck-conflict-before-import"`
Threshold int64 `toml:"threshold" json:"threshold"`
MaxRecordRows int64 `toml:"max-record-rows" json:"max-record-rows"`
}
// adjust assigns default values and check illegal values. The arguments must be
// adjusted before calling this function.
func (c *Conflict) adjust(i *TikvImporter) error {
strategyConfigFrom := "conflict.strategy"
if c.Strategy == NoneOnDup {
if i.OnDuplicate == NoneOnDup && i.Backend == BackendTiDB {
c.Strategy = ErrorOnDup
}
if i.OnDuplicate != NoneOnDup {
strategyConfigFrom = "tikv-importer.on-duplicate"
c.Strategy = i.OnDuplicate
}
}
strategyFromDuplicateResolution := false
if c.Strategy == NoneOnDup && i.DuplicateResolution != NoneOnDup {
c.Strategy = i.DuplicateResolution
strategyFromDuplicateResolution = true
}
switch c.Strategy {
case ReplaceOnDup, IgnoreOnDup, ErrorOnDup, NoneOnDup:
default:
return common.ErrInvalidConfig.GenWithStack(
"unsupported `%s` (%s)", strategyConfigFrom, c.Strategy)
}
if !strategyFromDuplicateResolution && c.Strategy != NoneOnDup && i.DuplicateResolution != NoneOnDup {
return common.ErrInvalidConfig.GenWithStack(
"%s cannot be used with tikv-importer.duplicate-resolution",
strategyConfigFrom)
}
if c.Strategy == IgnoreOnDup && i.Backend == BackendLocal {
return common.ErrInvalidConfig.GenWithStack(
`%s cannot be set to "ignore" when use tikv-importer.backend = "local"`,
strategyConfigFrom)
}
if c.PrecheckConflictBeforeImport && i.Backend == BackendTiDB {
return common.ErrInvalidConfig.GenWithStack(
`conflict.precheck-conflict-before-import cannot be set to true when use tikv-importer.backend = "tidb"`)
}
if c.Threshold < 0 {
switch c.Strategy {
case ErrorOnDup, NoneOnDup:
c.Threshold = 0
case IgnoreOnDup, ReplaceOnDup:
c.Threshold = DefaultRecordDuplicateThreshold
}
}
if c.Threshold > 0 && c.Strategy == ErrorOnDup {
return common.ErrInvalidConfig.GenWithStack(
`conflict.threshold cannot be set when use conflict.strategy = "error"`)
}
if c.Strategy == ReplaceOnDup && i.Backend == BackendTiDB {
// due to we use batch insert, we can't know which row is duplicated.
if c.MaxRecordRows >= 0 {
// only warn when it is set by user.
log.L().Warn(`Cannot record duplication (conflict.max-record-rows > 0) when use tikv-importer.backend = \"tidb\" and conflict.strategy = \"replace\".
The value of conflict.max-record-rows has been converted to 0.`)
}
c.MaxRecordRows = 0
} else {
if c.MaxRecordRows >= 0 {
// only warn when it is set by user.
log.L().Warn("Setting conflict.max-record-rows does not take affect. The value of conflict.max-record-rows has been converted to conflict.threshold.")
}
c.MaxRecordRows = c.Threshold
}
return nil
}
// NewConfig creates a new Config.
func NewConfig() *Config {
return &Config{
App: Lightning{
RegionConcurrency: runtime.NumCPU(),
TableConcurrency: 0,
IndexConcurrency: 0,
IOConcurrency: 5,
CheckRequirements: true,
TaskInfoSchemaName: defaultTaskInfoSchemaName,
},
Checkpoint: Checkpoint{
Enable: true,
},
TiDB: DBStore{
Host: "127.0.0.1",
User: "root",
StatusPort: 10080,
StrSQLMode: "ONLY_FULL_GROUP_BY,NO_AUTO_CREATE_USER",
MaxAllowedPacket: defaultMaxAllowedPacket,
BuildStatsConcurrency: defaultBuildStatsConcurrency,
DistSQLScanConcurrency: defaultDistSQLScanConcurrency,
IndexSerialScanConcurrency: defaultIndexSerialScanConcurrency,
ChecksumTableConcurrency: defaultChecksumTableConcurrency,
},
Cron: Cron{
SwitchMode: Duration{Duration: DefaultSwitchTiKVModeInterval},
LogProgress: Duration{Duration: 5 * time.Minute},
CheckDiskQuota: Duration{Duration: 1 * time.Minute},
},
Mydumper: MydumperRuntime{
ReadBlockSize: ReadBlockSize,
CSV: CSVConfig{
Separator: ",",
Delimiter: `"`,
Header: true,
HeaderSchemaMatch: true,
NotNull: false,
Null: []string{`\N`},
BackslashEscape: true,
EscapedBy: `\`,
TrimLastSep: false,
},
StrictFormat: false,
MaxRegionSize: MaxRegionSize,
Filter: GetDefaultFilter(),
DataCharacterSet: defaultCSVDataCharacterSet,
DataInvalidCharReplace: string(defaultCSVDataInvalidCharReplace),
},
TikvImporter: TikvImporter{
Backend: "",
MaxKVPairs: 4096,
SendKVPairs: 32768,
SendKVSize: KVWriteBatchSize,
RegionSplitSize: 0,
RegionSplitBatchSize: DefaultRegionSplitBatchSize,
RegionSplitConcurrency: runtime.GOMAXPROCS(0),
RegionCheckBackoffLimit: DefaultRegionCheckBackoffLimit,
DiskQuota: ByteSize(math.MaxInt64),
DuplicateResolution: NoneOnDup,
PausePDSchedulerScope: PausePDSchedulerScopeTable,
BlockSize: 16 * 1024,
LogicalImportBatchSize: ByteSize(defaultLogicalImportBatchSize),
LogicalImportBatchRows: defaultLogicalImportBatchRows,
},
PostRestore: PostRestore{
Checksum: OpLevelRequired,
Analyze: OpLevelOptional,
PostProcessAtLast: true,
ChecksumViaSQL: false,
},
Conflict: Conflict{
Strategy: NoneOnDup,
PrecheckConflictBeforeImport: false,
Threshold: -1,
MaxRecordRows: -1,
},
}
}
// LoadFromGlobal resets the current configuration to the global settings.
func (cfg *Config) LoadFromGlobal(global *GlobalConfig) error {
if err := cfg.LoadFromTOML(global.ConfigFileContent); err != nil {
return err
}
cfg.TiDB.Host = global.TiDB.Host
cfg.TiDB.Port = global.TiDB.Port
cfg.TiDB.User = global.TiDB.User
cfg.TiDB.Psw = global.TiDB.Psw
cfg.TiDB.StatusPort = global.TiDB.StatusPort
cfg.TiDB.PdAddr = global.TiDB.PdAddr
cfg.Mydumper.NoSchema = global.Mydumper.NoSchema
cfg.Mydumper.SourceDir = global.Mydumper.SourceDir
cfg.Mydumper.Filter = global.Mydumper.Filter
cfg.TikvImporter.Backend = global.TikvImporter.Backend
cfg.TikvImporter.SortedKVDir = global.TikvImporter.SortedKVDir
cfg.Checkpoint.Enable = global.Checkpoint.Enable
cfg.PostRestore.Checksum = global.PostRestore.Checksum
cfg.PostRestore.Analyze = global.PostRestore.Analyze
cfg.App.CheckRequirements = global.App.CheckRequirements
cfg.Security = global.Security
cfg.Mydumper.IgnoreColumns = global.Mydumper.IgnoreColumns
return nil
}
// LoadFromTOML overwrites the current configuration by the TOML data
// If data contains toml items not in Config and GlobalConfig, return an error
// If data contains toml items not in Config, thus won't take effect, warn user
func (cfg *Config) LoadFromTOML(data []byte) error {
// bothUnused saves toml items not belong to Config nor GlobalConfig
var bothUnused []string
// warnItems saves legal toml items but won't effect
var warnItems []string
dataStr := string(data)
// Here we load toml into cfg, and rest logic is check unused keys
metaData, err := toml.Decode(dataStr, cfg)
if err != nil {
return errors.Trace(err)
}
unusedConfigKeys := metaData.Undecoded()
if len(unusedConfigKeys) == 0 {
return nil
}
// Now we deal with potential both-unused keys of Config and GlobalConfig struct
metaDataGlobal, err := toml.Decode(dataStr, &GlobalConfig{})
if err != nil {
return errors.Trace(err)
}
// Key type returned by metadata.Undecoded doesn't have a equality comparison,
// we convert them to string type instead, and this conversion is identical
unusedGlobalKeys := metaDataGlobal.Undecoded()
unusedGlobalKeyStrs := make(map[string]struct{})
for _, key := range unusedGlobalKeys {
unusedGlobalKeyStrs[key.String()] = struct{}{}
}
iterateUnusedKeys:
for _, key := range unusedConfigKeys {
keyStr := key.String()
switch keyStr {
// these keys are not counted as decoded by toml decoder, but actually they are decoded,
// because the corresponding unmarshal logic handles these key's decoding in a custom way
case "lightning.max-error.type",
"lightning.max-error.conflict":
continue iterateUnusedKeys
}
if _, found := unusedGlobalKeyStrs[keyStr]; found {
bothUnused = append(bothUnused, keyStr)
} else {
warnItems = append(warnItems, keyStr)
}
}
if len(bothUnused) > 0 {
return errors.Errorf("config file contained unknown configuration options: %s",
strings.Join(bothUnused, ", "))
}
// Warn that some legal field of config file won't be overwritten, such as lightning.file
if len(warnItems) > 0 {
log.L().Warn("currently only per-task configuration can be applied, global configuration changes can only be made on startup",
zap.Strings("global config changes", warnItems))
}
return nil
}
// Adjust fixes the invalid or unspecified settings to reasonable valid values,
// and checks for illegal configuration.
func (cfg *Config) Adjust(ctx context.Context) error {
// note that the argument of `adjust` should be `adjust`ed before using it.
if err := cfg.TikvImporter.adjust(); err != nil {
return err
}
cfg.App.adjust(&cfg.TikvImporter)
if err := cfg.Mydumper.adjust(); err != nil {
return err
}
cfg.PostRestore.adjust(&cfg.TikvImporter)
tlsObj, err := cfg.ToTLS()
if err != nil {
return err
}
if err = cfg.TiDB.adjust(ctx, &cfg.TikvImporter, &cfg.Security, tlsObj); err != nil {
return err
}
cfg.Checkpoint.adjust(&cfg.TiDB)
if err = cfg.Routes.adjust(&cfg.Mydumper); err != nil {
return err
}
return cfg.Conflict.adjust(&cfg.TikvImporter)
}