603 lines
21 KiB
Go
603 lines
21 KiB
Go
// Copyright 2023 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package loadremotetest
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/fsouza/fake-gcs-server/fakestorage"
|
|
"github.com/pingcap/tidb/pkg/testkit"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func (s *mockGCSSuite) TestLoadCSV() {
|
|
s.tk.MustExec("DROP DATABASE IF EXISTS load_csv;")
|
|
s.tk.MustExec("CREATE DATABASE load_csv;")
|
|
s.tk.MustExec("CREATE TABLE load_csv.t (i INT, s varchar(32));")
|
|
|
|
// no-new-line-at-end
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load-csv",
|
|
Name: "no-new-line-at-end.csv",
|
|
},
|
|
Content: []byte(`i,s
|
|
100,"test100"
|
|
101,"\""
|
|
102,"😄😄😄😄😄"
|
|
104,""`),
|
|
})
|
|
|
|
sql := fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load-csv/no-new-line-at-end.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
|
|
LINES TERMINATED BY '\n' IGNORE 1 LINES;`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
"100 test100",
|
|
"101 \"",
|
|
"102 😄😄😄😄😄",
|
|
"104 ",
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
// new-line-at-end
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load-csv",
|
|
Name: "new-line-at-end.csv",
|
|
},
|
|
Content: []byte(`i,s
|
|
100,"test100"
|
|
101,"\""
|
|
102,"😄😄😄😄😄"
|
|
104,""
|
|
`),
|
|
})
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load-csv/new-line-at-end.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
|
|
LINES TERMINATED BY '\n' IGNORE 1 LINES;`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
"100 test100",
|
|
"101 \"",
|
|
"102 😄😄😄😄😄",
|
|
"104 ",
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
// can't read file at tidb-server
|
|
sql = "LOAD DATA INFILE '/etc/passwd' INTO TABLE load_csv.t;"
|
|
s.tk.MustContainErrMsg(sql, "Don't support load data from tidb-server's disk. Or if you want to load local data via client, the path of INFILE '/etc/passwd' needs to specify the clause of LOCAL first")
|
|
}
|
|
|
|
func (s *mockGCSSuite) TestLoadCsvInTransaction() {
|
|
s.tk.MustExec("DROP DATABASE IF EXISTS load_csv;")
|
|
s.tk.MustExec("CREATE DATABASE load_csv;")
|
|
s.tk.MustExec("CREATE TABLE load_csv.t (i INT, s varchar(32));")
|
|
|
|
s.server.CreateObject(
|
|
fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load-csv",
|
|
Name: "data.csv",
|
|
},
|
|
Content: []byte("100, test100\n101, hello\n102, 😄😄😄😄😄\n104, bye"),
|
|
},
|
|
)
|
|
|
|
s.tk.MustExec("begin pessimistic")
|
|
sql := fmt.Sprintf(
|
|
`LOAD DATA INFILE 'gs://test-load-csv/data.csv?endpoint=%s' INTO TABLE load_csv.t `+
|
|
"FIELDS TERMINATED BY ','",
|
|
s.GetGCSEndpoint(),
|
|
)
|
|
// test: load data stmt doesn't commit it
|
|
s.tk.MustExec("insert into load_csv.t values (1, 'a')")
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("select i from load_csv.t order by i").Check(
|
|
testkit.Rows(
|
|
"1", "100", "101",
|
|
"102", "104",
|
|
),
|
|
)
|
|
// load data can be rolled back
|
|
s.tk.MustExec("rollback")
|
|
s.tk.MustQuery("select * from load_csv.t").Check(testkit.Rows())
|
|
|
|
// load data commit
|
|
s.tk.MustExec("begin pessimistic")
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustExec("commit")
|
|
s.tk.MustQuery("select i from load_csv.t").Check(testkit.Rows("100", "101", "102", "104"))
|
|
}
|
|
|
|
func (s *mockGCSSuite) TestIgnoreNLines() {
|
|
s.tk.MustExec("DROP DATABASE IF EXISTS load_csv;")
|
|
s.tk.MustExec("CREATE DATABASE load_csv;")
|
|
s.tk.MustExec("CREATE TABLE load_csv.t (s varchar(32), i INT);")
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-bucket",
|
|
Name: "ignore-lines-bad-syntax.csv",
|
|
},
|
|
Content: []byte(`"bad syntax"1
|
|
"b",2
|
|
"c",3
|
|
`),
|
|
})
|
|
|
|
sql := fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/ignore-lines-bad-syntax.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
|
|
LINES TERMINATED BY '\n' IGNORE 1 LINES;`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
"b 2",
|
|
"c 3",
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/ignore-lines-bad-syntax.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
|
|
LINES TERMINATED BY '\n' IGNORE 100 LINES;`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows())
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
// test IGNORE N LINES will directly find (line) terminator without checking it's inside quotes
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-bucket",
|
|
Name: "count-terminator-inside-quotes.csv",
|
|
},
|
|
Content: []byte(`"a
|
|
",1
|
|
"b
|
|
",2
|
|
"c",3
|
|
`),
|
|
})
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/count-terminator-inside-quotes.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
|
|
LINES TERMINATED BY '\n' IGNORE 2 LINES;`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
"b\n 2",
|
|
"c 3",
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
}
|
|
|
|
func (s *mockGCSSuite) TestCustomizeNULL() {
|
|
s.tk.MustExec("DROP DATABASE IF EXISTS load_csv;")
|
|
s.tk.MustExec("CREATE DATABASE load_csv;")
|
|
s.tk.MustExec("CREATE TABLE load_csv.t (c varchar(32), c2 varchar(32));")
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-bucket",
|
|
Name: "customize-null.csv",
|
|
},
|
|
Content: []byte(`\N,"\N"
|
|
!N,"!N"
|
|
NULL,"NULL"
|
|
mynull,"mynull"
|
|
`),
|
|
})
|
|
|
|
sql := fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/customize-null.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ','
|
|
LINES TERMINATED BY '\n';`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
`<nil> "N"`,
|
|
`!N "!N"`,
|
|
`NULL "NULL"`,
|
|
`mynull "mynull"`,
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/customize-null.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' ESCAPED BY '\\'
|
|
LINES TERMINATED BY '\n';`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
`<nil> "N"`,
|
|
`!N "!N"`,
|
|
`NULL "NULL"`,
|
|
`mynull "mynull"`,
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/customize-null.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '!'
|
|
LINES TERMINATED BY '\n';`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
`\N \N`,
|
|
`<nil> <nil>`,
|
|
`<nil> NULL`,
|
|
`mynull mynull`,
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/customize-null.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' DEFINED NULL BY 'NULL'
|
|
LINES TERMINATED BY '\n';`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
`<nil> <nil>`,
|
|
`!N !N`,
|
|
`<nil> NULL`,
|
|
`mynull mynull`,
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/customize-null.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' DEFINED NULL BY 'NULL' OPTIONALLY ENCLOSED
|
|
LINES TERMINATED BY '\n'`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
`<nil> <nil>`,
|
|
`!N !N`,
|
|
`<nil> <nil>`,
|
|
`mynull mynull`,
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/customize-null.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '!' DEFINED NULL BY 'mynull' OPTIONALLY ENCLOSED
|
|
LINES TERMINATED BY '\n'`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
`\N \N`,
|
|
`<nil> <nil>`,
|
|
`NULL NULL`,
|
|
`<nil> <nil>`,
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
ascii0 := string([]byte{0})
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-bucket",
|
|
Name: "ascii-0.csv",
|
|
},
|
|
Content: fmt.Appendf(nil, `\0,"\0"
|
|
%s,"%s"`, ascii0, ascii0),
|
|
})
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/ascii-0.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '' DEFINED NULL BY x'00' OPTIONALLY ENCLOSED
|
|
LINES TERMINATED BY '\n';`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
`\0 \0`,
|
|
`<nil> <nil>`,
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/ascii-0.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' DEFINED NULL BY x'00'
|
|
LINES TERMINATED BY '\n';`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
"<nil> \000",
|
|
"<nil> \000",
|
|
))
|
|
s.tk.MustExec("TRUNCATE TABLE load_csv.t;")
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gcs://test-bucket/customize-null.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' DEFINED NULL BY 'mynull' OPTIONALLY ENCLOSED
|
|
LINES TERMINATED BY '\n';`, s.GetGCSEndpoint())
|
|
s.tk.MustMatchErrMsg(sql, `must specify FIELDS \[OPTIONALLY\] ENCLOSED BY`)
|
|
}
|
|
|
|
func (s *mockGCSSuite) TestGeneratedColumns() {
|
|
s.tk.MustExec("DROP DATABASE IF EXISTS load_csv;")
|
|
s.tk.MustExec("CREATE DATABASE load_csv;")
|
|
s.tk.MustExec("USE load_csv;")
|
|
s.tk.MustExec("set @@sql_mode = ''")
|
|
s.tk.MustExec(`CREATE TABLE load_csv.t_gen1 (a int, b int generated ALWAYS AS (a+1));`)
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-bucket",
|
|
Name: "generated_columns.csv",
|
|
},
|
|
Content: []byte("1\t2\n2\t3"),
|
|
})
|
|
|
|
s.tk.MustExec(fmt.Sprintf("LOAD DATA INFILE 'gcs://test-bucket/generated_columns.csv?endpoint=%s'"+
|
|
" INTO TABLE load_csv.t_gen1", s.GetGCSEndpoint()))
|
|
s.tk.MustQuery("select * from t_gen1").Check(testkit.Rows("1 2", "2 3"))
|
|
s.tk.MustExec("delete from t_gen1")
|
|
|
|
// Specify the column, this should also work.
|
|
s.tk.MustExec(fmt.Sprintf("LOAD DATA INFILE 'gcs://test-bucket/generated_columns.csv?endpoint=%s'"+
|
|
" INTO TABLE load_csv.t_gen1 (a)", s.GetGCSEndpoint()))
|
|
s.tk.MustQuery("select * from t_gen1").Check(testkit.Rows("1 2", "2 3"))
|
|
|
|
// Swap the column and test again.
|
|
s.tk.MustExec(`create table t_gen2 (a int generated ALWAYS AS (b+1), b int);`)
|
|
s.tk.MustExec(fmt.Sprintf("LOAD DATA INFILE 'gcs://test-bucket/generated_columns.csv?endpoint=%s'"+
|
|
" INTO TABLE load_csv.t_gen2", s.GetGCSEndpoint()))
|
|
s.tk.MustQuery("select * from t_gen2").Check(testkit.Rows("3 2", "4 3"))
|
|
s.tk.MustExec(`delete from t_gen2`)
|
|
|
|
// Specify the column b
|
|
s.tk.MustExec(fmt.Sprintf("LOAD DATA INFILE 'gcs://test-bucket/generated_columns.csv?endpoint=%s'"+
|
|
" INTO TABLE load_csv.t_gen2 (b)", s.GetGCSEndpoint()))
|
|
s.tk.MustQuery("select * from t_gen2").Check(testkit.Rows("2 1", "3 2"))
|
|
s.tk.MustExec(`delete from t_gen2`)
|
|
|
|
// Specify the column a
|
|
s.tk.MustExec(fmt.Sprintf("LOAD DATA INFILE 'gcs://test-bucket/generated_columns.csv?endpoint=%s'"+
|
|
" INTO TABLE load_csv.t_gen2 (a)", s.GetGCSEndpoint()))
|
|
s.tk.MustQuery("select * from t_gen2").Check(testkit.Rows("<nil> <nil>", "<nil> <nil>"))
|
|
}
|
|
|
|
func (s *mockGCSSuite) TestMultiValueIndex() {
|
|
s.tk.MustExec("DROP DATABASE IF EXISTS load_csv;")
|
|
s.tk.MustExec("CREATE DATABASE load_csv;")
|
|
s.tk.MustExec(`CREATE TABLE load_csv.t (
|
|
i INT, j JSON,
|
|
KEY idx ((cast(j as signed array)))
|
|
);`)
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load-csv",
|
|
Name: "1.csv",
|
|
},
|
|
Content: []byte(`i,s
|
|
1,"[1,2,3]"
|
|
2,"[2,3,4]"`),
|
|
})
|
|
|
|
sql := fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load-csv/1.csv?endpoint=%s' INTO TABLE load_csv.t
|
|
FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"'
|
|
LINES TERMINATED BY '\n' IGNORE 1 LINES;`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_csv.t;").Check(testkit.Rows(
|
|
"1 [1, 2, 3]",
|
|
"2 [2, 3, 4]",
|
|
))
|
|
}
|
|
|
|
func (s *mockGCSSuite) TestGBK() {
|
|
s.tk.MustExec("DROP DATABASE IF EXISTS load_charset;")
|
|
s.tk.MustExec("CREATE DATABASE load_charset;")
|
|
s.tk.MustExec(`CREATE TABLE load_charset.gbk (
|
|
i INT, j VARCHAR(255)
|
|
) CHARACTER SET gbk;`)
|
|
s.tk.MustExec(`CREATE TABLE load_charset.utf8mb4 (
|
|
i INT, j VARCHAR(255)
|
|
) CHARACTER SET utf8mb4;`)
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load",
|
|
Name: "gbk.tsv",
|
|
},
|
|
Content: []byte{
|
|
// 1 一丁丂七丄丅丆万丈三上下丌不与丏
|
|
0x31, 0x09, 0xd2, 0xbb, 0xb6, 0xa1, 0x81, 0x40, 0xc6, 0xdf, 0x81,
|
|
0x41, 0x81, 0x42, 0x81, 0x43, 0xcd, 0xf2, 0xd5, 0xc9, 0xc8, 0xfd,
|
|
0xc9, 0xcf, 0xcf, 0xc2, 0xd8, 0xa2, 0xb2, 0xbb, 0xd3, 0xeb, 0x81,
|
|
0x44, 0x0a,
|
|
// 2 丐丑丒专且丕世丗丘丙业丛东丝丞丢
|
|
0x32, 0x09, 0xd8, 0xa4, 0xb3, 0xf3, 0x81, 0x45, 0xd7, 0xa8, 0xc7,
|
|
0xd2, 0xd8, 0xa7, 0xca, 0xc0, 0x81, 0x46, 0xc7, 0xf0, 0xb1, 0xfb,
|
|
0xd2, 0xb5, 0xb4, 0xd4, 0xb6, 0xab, 0xcb, 0xbf, 0xd8, 0xa9, 0xb6,
|
|
0xaa,
|
|
},
|
|
})
|
|
|
|
sql := fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/gbk.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.gbk CHARACTER SET gbk`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_charset.gbk;").Check(testkit.Rows(
|
|
"1 一丁丂七丄丅丆万丈三上下丌不与丏",
|
|
"2 丐丑丒专且丕世丗丘丙业丛东丝丞丢",
|
|
))
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/gbk.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.utf8mb4 CHARACTER SET gbk`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_charset.utf8mb4;").Check(testkit.Rows(
|
|
"1 一丁丂七丄丅丆万丈三上下丌不与丏",
|
|
"2 丐丑丒专且丕世丗丘丙业丛东丝丞丢",
|
|
))
|
|
|
|
s.tk.MustExec("TRUNCATE TABLE load_charset.utf8mb4;")
|
|
s.tk.MustExec("SET SESSION character_set_database = 'gbk';")
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/gbk.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.utf8mb4`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_charset.utf8mb4;").Check(testkit.Rows(
|
|
"1 一丁丂七丄丅丆万丈三上下丌不与丏",
|
|
"2 丐丑丒专且丕世丗丘丙业丛东丝丞丢",
|
|
))
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load",
|
|
Name: "utf8mb4.tsv",
|
|
},
|
|
Content: []byte("1\t一丁丂七丄丅丆万丈三上下丌不与丏\n" +
|
|
"2\t丐丑丒专且丕世丗丘丙业丛东丝丞丢"),
|
|
})
|
|
|
|
s.tk.MustExec("TRUNCATE TABLE load_charset.gbk;")
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/utf8mb4.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.gbk CHARACTER SET utf8mb4`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_charset.gbk;").Check(testkit.Rows(
|
|
"1 一丁丂七丄丅丆万丈三上下丌不与丏",
|
|
"2 丐丑丒专且丕世丗丘丙业丛东丝丞丢",
|
|
))
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load",
|
|
Name: "emoji.tsv",
|
|
},
|
|
Content: []byte("1\t一丁丂七😀😁😂😃\n" +
|
|
"2\t丐丑丒专😄😅😆😇"),
|
|
})
|
|
|
|
s.tk.MustExec("TRUNCATE TABLE load_charset.gbk;")
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/emoji.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.gbk CHARACTER SET utf8mb4`, s.GetGCSEndpoint())
|
|
err := s.tk.ExecToErr(sql)
|
|
checkClientErrorMessage(s.T(), err, `ERROR 1366 (HY000): Incorrect string value '\xF0\x9F\x98\x80' for column 'j'`)
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/emoji.tsv?endpoint=%s'
|
|
IGNORE INTO TABLE load_charset.gbk CHARACTER SET utf8mb4`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
require.Equal(s.T(), "Records: 2 Deleted: 0 Skipped: 0 Warnings: 2", s.tk.Session().GetSessionVars().StmtCtx.GetMessage())
|
|
s.tk.MustQuery("SELECT HEX(j) FROM load_charset.gbk;").Check(testkit.Rows(
|
|
"D2BBB6A18140C6DF3F3F3F3F",
|
|
"D8A4B3F38145D7A83F3F3F3F",
|
|
))
|
|
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/gbk.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.utf8mb4 CHARACTER SET unknown`, s.GetGCSEndpoint())
|
|
err = s.tk.ExecToErr(sql)
|
|
require.ErrorContains(s.T(), err, "Unknown character set: 'unknown'")
|
|
}
|
|
|
|
func (s *mockGCSSuite) TestOtherCharset() {
|
|
s.tk.MustExec("DROP DATABASE IF EXISTS load_charset;")
|
|
s.tk.MustExec("CREATE DATABASE load_charset;")
|
|
s.tk.MustExec(`CREATE TABLE load_charset.utf8 (
|
|
i INT, j VARCHAR(255)
|
|
) CHARACTER SET utf8;`)
|
|
s.tk.MustExec(`CREATE TABLE load_charset.utf8mb4 (
|
|
i INT, j VARCHAR(255)
|
|
) CHARACTER SET utf8mb4;`)
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load",
|
|
Name: "utf8.tsv",
|
|
},
|
|
Content: []byte("1\tကခဂဃ\n2\tငစဆဇ"),
|
|
})
|
|
|
|
sql := fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/utf8.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.utf8 CHARACTER SET utf8`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_charset.utf8;").Check(testkit.Rows(
|
|
"1 ကခဂဃ",
|
|
"2 ငစဆဇ",
|
|
))
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/utf8.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.utf8mb4 CHARACTER SET utf8`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_charset.utf8mb4;").Check(testkit.Rows(
|
|
"1 ကခဂဃ",
|
|
"2 ငစဆဇ",
|
|
))
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load",
|
|
Name: "latin1.tsv",
|
|
},
|
|
// "1\t‘’“”\n2\t¡¢£¤"
|
|
Content: []byte{0x31, 0x09, 0x91, 0x92, 0x93, 0x94, 0x0a, 0x32, 0x09, 0xa1, 0xa2, 0xa3, 0xa4},
|
|
})
|
|
s.tk.MustExec(`CREATE TABLE load_charset.latin1 (
|
|
i INT, j VARCHAR(255)
|
|
) CHARACTER SET latin1;`)
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/latin1.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.latin1 CHARACTER SET latin1`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_charset.latin1;").Check(testkit.Rows(
|
|
"1 ‘’“”",
|
|
"2 ¡¢£¤",
|
|
))
|
|
|
|
s.tk.MustExec("TRUNCATE TABLE load_charset.utf8mb4;")
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/latin1.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.utf8mb4 CHARACTER SET latin1`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_charset.utf8mb4;").Check(testkit.Rows(
|
|
"1 ‘’“”",
|
|
"2 ¡¢£¤",
|
|
))
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{
|
|
BucketName: "test-load",
|
|
Name: "ascii.tsv",
|
|
},
|
|
Content: []byte{0, 1, 2, 3, 4, 5, 6, 7},
|
|
})
|
|
s.tk.MustExec(`CREATE TABLE load_charset.ascii (
|
|
j VARCHAR(255)
|
|
) CHARACTER SET ascii;`)
|
|
s.tk.MustExec(`CREATE TABLE load_charset.binary (
|
|
j VARCHAR(255)
|
|
) CHARACTER SET binary;`)
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/ascii.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.ascii CHARACTER SET ascii`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT HEX(j) FROM load_charset.ascii;").Check(testkit.Rows(
|
|
"0001020304050607",
|
|
))
|
|
sql = fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/ascii.tsv?endpoint=%s'
|
|
INTO TABLE load_charset.binary CHARACTER SET binary`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT HEX(j) FROM load_charset.binary;").Check(testkit.Rows(
|
|
"0001020304050607",
|
|
))
|
|
}
|
|
|
|
func (s *mockGCSSuite) TestColumnsAndUserVars() {
|
|
s.tk.MustExec("DROP DATABASE IF EXISTS load_data;")
|
|
s.tk.MustExec("CREATE DATABASE load_data;")
|
|
s.tk.MustExec(`CREATE TABLE load_data.cols_and_vars (a INT, b INT, c int);`)
|
|
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{BucketName: "test-load", Name: "cols_and_vars-1.tsv"},
|
|
Content: []byte("1,11,111\n2,22,222\n3,33,333\n4,44,444\n5,55,555\n"),
|
|
})
|
|
s.server.CreateObject(fakestorage.Object{
|
|
ObjectAttrs: fakestorage.ObjectAttrs{BucketName: "test-load", Name: "cols_and_vars-2.tsv"},
|
|
Content: []byte("6,66,666\n7,77,777\n8,88,888\n9,99,999\n"),
|
|
})
|
|
sql := fmt.Sprintf(`LOAD DATA INFILE 'gs://test-load/cols_and_vars-*.tsv?endpoint=%s'
|
|
INTO TABLE load_data.cols_and_vars fields terminated by ','
|
|
(@V1, @v2, @v3) set a=@V1, b=@V2*10, c=123`, s.GetGCSEndpoint())
|
|
s.tk.MustExec(sql)
|
|
s.tk.MustQuery("SELECT * FROM load_data.cols_and_vars;").Sort().Check(testkit.Rows(
|
|
"1 110 123",
|
|
"2 220 123",
|
|
"3 330 123",
|
|
"4 440 123",
|
|
"5 550 123",
|
|
"6 660 123",
|
|
"7 770 123",
|
|
"8 880 123",
|
|
"9 990 123",
|
|
))
|
|
}
|