[improvement](Load) Make broker load support the properties of trim_double_quotes and skip_lines (#16622)

`trim_double_quotes` and `skip_lines` were supported in stream load.
So make it support broker load too.
This commit is contained in:
huangzhaowei
2023-02-12 16:52:59 +08:00
committed by GitHub
parent 6a8fc35b78
commit 78a958467f
6 changed files with 59 additions and 0 deletions

View File

@ -477,6 +477,24 @@ WITH BROKER broker_name
)
```
12. Load CSV date and trim double quotes and skip first 5 lines
```SQL
LOAD LABEL example_db.label12
(
DATA INFILE("cosn://my_bucket/input/file.csv")
INTO TABLE `my_table`
(k1, k2, k3)
PROPERTIES("trim_double_quotes" = "true", "skip_lines" = "5")
)
WITH BROKER "broker_name"
(
"fs.cosn.userinfo.secretId" = "xxx",
"fs.cosn.userinfo.secretKey" = "xxxx",
"fs.cosn.bucket.endpoint_suffix" = "cos.xxxxxxxxx.myqcloud.com"
)
```
### Keywords
BROKER, LOAD

View File

@ -182,6 +182,8 @@ ERRORS:
25. trim_double_quotes: Boolean type, The default value is false. True means that the outermost double quotes of each field in the csv file are trimmed.
26. skip_lines: <version since="dev" type="inline"> Integer type, the default value is 0. It will skip some lines in the head of csv file. It will be disabled when format is `csv_with_names` or `csv_with_names_and_types`. </version>
### Example
1. Import the data in the local file 'testData' into the table 'testTbl' in the database 'testDb', and use Label for deduplication. Specify a timeout of 100 seconds

View File

@ -475,6 +475,24 @@ WITH BROKER broker_name
)
```
12. 导入CSV数据时去掉双引号, 并跳过前5行。
```SQL
LOAD LABEL example_db.label12
(
DATA INFILE("cosn://my_bucket/input/file.csv")
INTO TABLE `my_table`
(k1, k2, k3)
PROPERTIES("trim_double_quotes" = "true", "skip_lines" = "5")
)
WITH BROKER "broker_name"
(
"fs.cosn.userinfo.secretId" = "xxx",
"fs.cosn.userinfo.secretKey" = "xxxx",
"fs.cosn.bucket.endpoint_suffix" = "cos.xxxxxxxxx.myqcloud.com"
)
```
### Keywords
BROKER, LOAD

View File

@ -179,6 +179,7 @@ ERRORS:
25. trim_double_quotes: 布尔类型,默认值为 false,为 true 时表示裁剪掉 csv 文件每个字段最外层的双引号。
26. skip_lines: <version since="dev" type="inline"> 整数类型, 默认值为0, 含义为跳过csv文件的前几行. 当设置format设置为 `csv_with_names` 或、`csv_with_names_and_types` 时, 该参数会失效. </version>
### Example
1. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表,使用Label用于去重。指定超时时间为 100 秒

View File

@ -938,6 +938,13 @@ public class DataDescription {
if (analysisMap.containsKey(LoadStmt.KEY_IN_PARAM_NUM_AS_STRING)) {
numAsString = Boolean.parseBoolean(analysisMap.get(LoadStmt.KEY_IN_PARAM_NUM_AS_STRING));
}
if (analysisMap.containsKey(LoadStmt.KEY_TRIM_DOUBLE_QUOTES)) {
trimDoubleQuotes = Boolean.parseBoolean(analysisMap.get(LoadStmt.KEY_TRIM_DOUBLE_QUOTES));
}
if (analysisMap.containsKey(LoadStmt.KEY_SKIP_LINES)) {
skipLines = Integer.parseInt(analysisMap.get(LoadStmt.KEY_SKIP_LINES));
}
}
private void checkLoadPriv(String fullDbName) throws AnalysisException {

View File

@ -118,6 +118,7 @@ public class LoadStmt extends DdlStmt {
public static final String KEY_IN_PARAM_SEQUENCE_COL = "sequence_col";
public static final String KEY_IN_PARAM_BACKEND_ID = "backend_id";
public static final String KEY_SKIP_LINES = "skip_lines";
public static final String KEY_TRIM_DOUBLE_QUOTES = "trim_double_quotes";
private final LabelName label;
private final List<DataDescription> dataDescriptions;
@ -192,6 +193,18 @@ public class LoadStmt extends DdlStmt {
return Boolean.valueOf(s);
}
})
.put(KEY_SKIP_LINES, new Function<String, Integer>() {
@Override
public @Nullable Integer apply(@Nullable String s) {
return Integer.valueOf(s);
}
})
.put(KEY_TRIM_DOUBLE_QUOTES, new Function<String, Boolean>() {
@Override
public @Nullable Boolean apply(@Nullable String s) {
return Boolean.valueOf(s);
}
})
.build();
public LoadStmt(DataDescription dataDescription, Map<String, String> properties) {