[Enhencement](Outfile/Export) Export data to csv file format with BOM (#30533)
The UTF8 format of the Windows system has BOM.
We add a new user property to `Outfile/Export`。Therefore, when exporting Doris data, users can choose whether to bring BOM on the beginning of the CSV file.
**Usage:**
```sql
-- outfile:
select * from demo.student
into outfile "file:///xxx/export/exp_"
format as csv
properties(
"column_separator" = ",",
"with_bom" = "true"
);
-- Export:
EXPORT TABLE student TO "file:///xx/tmpdata/export/exp_"
PROPERTIES(
"format" = "csv",
"with_bom" = "true"
);
```
This commit is contained in:
@ -103,6 +103,7 @@ public class ExportStmt extends StatementBase {
|
||||
|
||||
private String maxFileSize;
|
||||
private String deleteExistingFiles;
|
||||
private String withBom;
|
||||
private SessionVariable sessionVariables;
|
||||
|
||||
private String qualifiedUser;
|
||||
@ -228,6 +229,7 @@ public class ExportStmt extends StatementBase {
|
||||
exportJob.setParallelism(this.parallelism);
|
||||
exportJob.setMaxFileSize(this.maxFileSize);
|
||||
exportJob.setDeleteExistingFiles(this.deleteExistingFiles);
|
||||
exportJob.setWithBom(this.withBom);
|
||||
|
||||
if (columns != null) {
|
||||
Splitter split = Splitter.on(',').trimResults().omitEmptyStrings();
|
||||
@ -354,6 +356,9 @@ public class ExportStmt extends StatementBase {
|
||||
// generate a random label
|
||||
this.label = "export_" + UUID.randomUUID();
|
||||
}
|
||||
|
||||
// with bom
|
||||
this.withBom = properties.getOrDefault(OutFileClause.PROP_WITH_BOM, "false");
|
||||
}
|
||||
|
||||
private void checkColumns() throws DdlException {
|
||||
|
||||
@ -136,6 +136,7 @@ public class OutFileClause {
|
||||
private static final String PROP_SUCCESS_FILE_NAME = "success_file_name";
|
||||
public static final String PROP_DELETE_EXISTING_FILES = "delete_existing_files";
|
||||
public static final String PROP_FILE_SUFFIX = "file_suffix";
|
||||
public static final String PROP_WITH_BOM = "with_bom";
|
||||
|
||||
private static final String PARQUET_PROP_PREFIX = "parquet.";
|
||||
private static final String SCHEMA = "schema";
|
||||
@ -155,6 +156,7 @@ public class OutFileClause {
|
||||
private long maxFileSizeBytes = DEFAULT_MAX_FILE_SIZE_BYTES;
|
||||
private boolean deleteExistingFiles = false;
|
||||
private String fileSuffix = "";
|
||||
private boolean withBom = false;
|
||||
private BrokerDesc brokerDesc = null;
|
||||
// True if result is written to local disk.
|
||||
// If set to true, the brokerDesc must be null.
|
||||
@ -566,6 +568,11 @@ public class OutFileClause {
|
||||
processedPropKeys.add(PROP_FILE_SUFFIX);
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROP_WITH_BOM)) {
|
||||
withBom = Boolean.valueOf(properties.get(PROP_WITH_BOM)).booleanValue();
|
||||
processedPropKeys.add(PROP_WITH_BOM);
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROP_SUCCESS_FILE_NAME)) {
|
||||
successFileName = properties.get(PROP_SUCCESS_FILE_NAME);
|
||||
FeNameFormat.checkOutfileSuccessFileName("file name", successFileName);
|
||||
@ -805,6 +812,7 @@ public class OutFileClause {
|
||||
sinkOptions.setMaxFileSizeBytes(maxFileSizeBytes);
|
||||
sinkOptions.setDeleteExistingFiles(deleteExistingFiles);
|
||||
sinkOptions.setFileSuffix(fileSuffix);
|
||||
sinkOptions.setWithBom(withBom);
|
||||
|
||||
if (brokerDesc != null) {
|
||||
sinkOptions.setBrokerProperties(brokerDesc.getProperties());
|
||||
|
||||
@ -166,6 +166,8 @@ public class ExportJob implements Writable {
|
||||
|
||||
@SerializedName("tabletsNum")
|
||||
private Integer tabletsNum;
|
||||
@SerializedName("withBom")
|
||||
private String withBom;
|
||||
|
||||
private TableRef tableRef;
|
||||
|
||||
@ -219,6 +221,7 @@ public class ExportJob implements Writable {
|
||||
this.columnSeparator = "\t";
|
||||
this.lineDelimiter = "\n";
|
||||
this.columns = "";
|
||||
this.withBom = "false";
|
||||
}
|
||||
|
||||
public ExportJob(long jobId) {
|
||||
@ -554,6 +557,7 @@ public class ExportJob implements Writable {
|
||||
if (!deleteExistingFiles.isEmpty()) {
|
||||
outfileProperties.put(OutFileClause.PROP_DELETE_EXISTING_FILES, deleteExistingFiles);
|
||||
}
|
||||
outfileProperties.put(OutFileClause.PROP_WITH_BOM, withBom);
|
||||
|
||||
// broker properties
|
||||
// outfile clause's broker properties need 'broker.' prefix
|
||||
|
||||
@ -349,6 +349,7 @@ public class ExportMgr {
|
||||
infoMap.put("broker", job.getBrokerDesc().getName());
|
||||
infoMap.put("column_separator", job.getColumnSeparator());
|
||||
infoMap.put("format", job.getFormat());
|
||||
infoMap.put("with_bom", job.getWithBom());
|
||||
infoMap.put("line_delimiter", job.getLineDelimiter());
|
||||
infoMap.put("columns", job.getColumns());
|
||||
infoMap.put("tablet_num", job.getTabletsNum());
|
||||
|
||||
@ -40,7 +40,6 @@ import org.apache.doris.scheduler.executor.TransientTaskExecutor;
|
||||
import org.apache.doris.thrift.TUniqueId;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.util.List;
|
||||
@ -56,7 +55,6 @@ public class ExportTaskExecutor implements TransientTaskExecutor {
|
||||
|
||||
ExportJob exportJob;
|
||||
|
||||
@Setter
|
||||
Long taskId;
|
||||
|
||||
private StmtExecutor stmtExecutor;
|
||||
@ -205,4 +203,8 @@ public class ExportTaskExecutor implements TransientTaskExecutor {
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
public void setTaskId(Long taskId) {
|
||||
this.taskId = taskId;
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,6 +88,7 @@ public class ExportCommand extends Command implements ForwardWithSync {
|
||||
.add(PropertyAnalyzer.PROPERTIES_LINE_DELIMITER)
|
||||
.add(PropertyAnalyzer.PROPERTIES_TIMEOUT)
|
||||
.add("format")
|
||||
.add(OutFileClause.PROP_WITH_BOM)
|
||||
.build();
|
||||
|
||||
private final List<String> nameParts;
|
||||
@ -267,6 +268,9 @@ public class ExportCommand extends Command implements ForwardWithSync {
|
||||
exportJob.setFormat(fileProperties.getOrDefault(LoadStmt.KEY_IN_PARAM_FORMAT_TYPE, "csv")
|
||||
.toLowerCase());
|
||||
|
||||
// set withBom
|
||||
exportJob.setWithBom(fileProperties.getOrDefault(OutFileClause.PROP_WITH_BOM, "false"));
|
||||
|
||||
// set parallelism
|
||||
int parallelism;
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user