176 lines
4.9 KiB
Go
176 lines
4.9 KiB
Go
// Copyright 2025 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package importsdk
|
|
|
|
import (
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/pingcap/errors"
|
|
"github.com/pingcap/tidb/pkg/lightning/mydump"
|
|
)
|
|
|
|
// generateWildcardPath creates a wildcard pattern path that matches only this table's files
|
|
func generateWildcardPath(
|
|
files []mydump.FileInfo,
|
|
allFiles map[string]mydump.FileInfo,
|
|
) (string, error) {
|
|
tableFiles := make(map[string]struct{}, len(files))
|
|
for _, df := range files {
|
|
tableFiles[df.FileMeta.Path] = struct{}{}
|
|
}
|
|
|
|
if len(files) == 0 {
|
|
return "", errors.Annotate(ErrNoTableDataFiles, "cannot generate wildcard pattern because the table has no data files")
|
|
}
|
|
|
|
// If there's only one file, we can just return its path
|
|
if len(files) == 1 {
|
|
return files[0].FileMeta.Path, nil
|
|
}
|
|
|
|
// Try Mydumper-specific pattern first
|
|
p := generateMydumperPattern(files[0])
|
|
if p != "" && isValidPattern(p, tableFiles, allFiles) {
|
|
return p, nil
|
|
}
|
|
|
|
// Fallback to generic prefix/suffix pattern
|
|
paths := make([]string, 0, len(files))
|
|
for _, file := range files {
|
|
paths = append(paths, file.FileMeta.Path)
|
|
}
|
|
p = generatePrefixSuffixPattern(paths)
|
|
if p != "" && isValidPattern(p, tableFiles, allFiles) {
|
|
return p, nil
|
|
}
|
|
return "", errors.Annotatef(ErrWildcardNotSpecific, "failed to find a wildcard that matches all and only the table's files.")
|
|
}
|
|
|
|
// isValidPattern checks if a wildcard pattern matches only the table's files
|
|
func isValidPattern(pattern string, tableFiles map[string]struct{}, allFiles map[string]mydump.FileInfo) bool {
|
|
if pattern == "" {
|
|
return false
|
|
}
|
|
|
|
for path := range allFiles {
|
|
isMatch, err := filepath.Match(pattern, path)
|
|
if err != nil {
|
|
return false // Invalid pattern
|
|
}
|
|
_, isTableFile := tableFiles[path]
|
|
|
|
// If pattern matches a file that's not from our table, it's invalid
|
|
if isMatch && !isTableFile {
|
|
return false
|
|
}
|
|
|
|
// If pattern doesn't match our table's file, it's also invalid
|
|
if !isMatch && isTableFile {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// generateMydumperPattern generates a wildcard pattern for Mydumper-formatted data files
|
|
// belonging to a specific table, based on their naming convention.
|
|
// It returns a pattern string that matches all data files for the table, or an empty string if not applicable.
|
|
func generateMydumperPattern(file mydump.FileInfo) string {
|
|
dbName, tableName := file.TableName.Schema, file.TableName.Name
|
|
if dbName == "" || tableName == "" {
|
|
return ""
|
|
}
|
|
|
|
// compute dirPrefix and basename
|
|
full := file.FileMeta.Path
|
|
dirPrefix, name := "", full
|
|
if idx := strings.LastIndex(full, "/"); idx >= 0 {
|
|
dirPrefix = full[:idx+1]
|
|
name = full[idx+1:]
|
|
}
|
|
|
|
// compression ext from filename when compression exists (last suffix like .gz/.zst)
|
|
compExt := ""
|
|
if file.FileMeta.Compression != mydump.CompressionNone {
|
|
compExt = filepath.Ext(name)
|
|
}
|
|
|
|
// data ext after stripping compression ext
|
|
base := strings.TrimSuffix(name, compExt)
|
|
dataExt := filepath.Ext(base)
|
|
return dirPrefix + dbName + "." + tableName + ".*" + dataExt + compExt
|
|
}
|
|
|
|
// longestCommonPrefix finds the longest string that is a prefix of all strings in the slice
|
|
func longestCommonPrefix(strs []string) string {
|
|
if len(strs) == 0 {
|
|
return ""
|
|
}
|
|
|
|
prefix := strs[0]
|
|
for _, s := range strs[1:] {
|
|
i := 0
|
|
for i < len(prefix) && i < len(s) && prefix[i] == s[i] {
|
|
i++
|
|
}
|
|
prefix = prefix[:i]
|
|
if prefix == "" {
|
|
break
|
|
}
|
|
}
|
|
|
|
return prefix
|
|
}
|
|
|
|
// longestCommonSuffix finds the longest string that is a suffix of all strings in the slice, starting after the given prefix length
|
|
func longestCommonSuffix(strs []string, prefixLen int) string {
|
|
if len(strs) == 0 {
|
|
return ""
|
|
}
|
|
|
|
suffix := strs[0][prefixLen:]
|
|
for _, s := range strs[1:] {
|
|
remaining := s[prefixLen:]
|
|
i := 0
|
|
for i < len(suffix) && i < len(remaining) && suffix[len(suffix)-i-1] == remaining[len(remaining)-i-1] {
|
|
i++
|
|
}
|
|
suffix = suffix[len(suffix)-i:]
|
|
if suffix == "" {
|
|
break
|
|
}
|
|
}
|
|
|
|
return suffix
|
|
}
|
|
|
|
// generatePrefixSuffixPattern returns a wildcard pattern that matches all and only the given paths
|
|
// by finding the longest common prefix and suffix among them, and placing a '*' wildcard in between.
|
|
func generatePrefixSuffixPattern(paths []string) string {
|
|
if len(paths) == 0 {
|
|
return ""
|
|
}
|
|
if len(paths) == 1 {
|
|
return paths[0]
|
|
}
|
|
|
|
prefix := longestCommonPrefix(paths)
|
|
suffix := longestCommonSuffix(paths, len(prefix))
|
|
|
|
return prefix + "*" + suffix
|
|
}
|