[Enhencement](csv-reader) Optimize csv_reader _split_value and fix json_reader case sensitive (#17093)

1. Enhencement:
    For single-charset column separator,csv_reader use another method of `split value`.
2. BugFix
    Set `json` file format loading to be sensitive.
This commit is contained in:
Tiewei Fang
2023-02-26 09:03:04 +08:00
committed by GitHub
parent c43e521d29
commit f6ce072297
4 changed files with 76 additions and 14 deletions

View File

@ -973,7 +973,7 @@ public class DataDescription {
// Change all the columns name to lower case, because Doris column is case-insensitive.
private void columnsNameToLowerCase(List<String> columns) {
if (columns == null || columns.isEmpty()) {
if (columns == null || columns.isEmpty() || "json".equals(this.fileFormat)) {
return;
}
for (int i = 0; i < columns.size(); i++) {
@ -1081,7 +1081,11 @@ public class DataDescription {
if (!mappingColNames.contains(column.getName())) {
parsedColumnExprList.add(new ImportColumnDesc(column.getName(), null));
}
fileFieldNames.add(column.getName().toLowerCase());
if ("json".equals(this.fileFormat)) {
fileFieldNames.add(column.getName());
} else {
fileFieldNames.add(column.getName().toLowerCase());
}
}
LOG.debug("after fill column info. columns: {}, parsed column exprs: {}", fileFieldNames, parsedColumnExprList);

View File

@ -603,7 +603,12 @@ public class Load {
if (hasSequenceCol && column.isSequenceColumn()) {
continue;
}
ImportColumnDesc columnDesc = new ImportColumnDesc(column.getName().toLowerCase());
ImportColumnDesc columnDesc = null;
if (formatType == TFileFormatType.FORMAT_JSON) {
columnDesc = new ImportColumnDesc(column.getName());
} else {
columnDesc = new ImportColumnDesc(column.getName().toLowerCase());
}
LOG.debug("add base column {} to stream load task", column.getName());
copiedColumnExprs.add(columnDesc);
}