[Fix](spark-load) ignore column name case in spark load (#23947)

Doris is not case sensitive to field names, so when doing spark load, we can convert all fields to lowercase for matching and loading.
This commit is contained in:
wuwenchi
2023-09-10 19:45:01 +08:00
committed by GitHub
parent 8e171f5cbf
commit 102abff071
3 changed files with 14 additions and 4 deletions

View File

@ -103,6 +103,7 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
@ -406,7 +407,13 @@ public class SparkLoadJob extends BulkLoadJob {
private PushBrokerReaderParams getPushBrokerReaderParams(OlapTable table, long indexId) throws UserException {
if (!indexToPushBrokerReaderParams.containsKey(indexId)) {
PushBrokerReaderParams pushBrokerReaderParams = new PushBrokerReaderParams();
pushBrokerReaderParams.init(table.getSchemaByIndexId(indexId), brokerDesc);
List<Column> columns = new ArrayList<>();
table.getSchemaByIndexId(indexId).forEach(col -> {
Column column = new Column(col);
column.setName(col.getName().toLowerCase(Locale.ROOT));
columns.add(column);
});
pushBrokerReaderParams.init(columns, brokerDesc);
indexToPushBrokerReaderParams.put(indexId, pushBrokerReaderParams);
}
return indexToPushBrokerReaderParams.get(indexId);
@ -463,7 +470,9 @@ public class SparkLoadJob extends BulkLoadJob {
List<TColumn> columnsDesc = new ArrayList<TColumn>();
for (Column column : olapTable.getSchemaByIndexId(indexId)) {
columnsDesc.add(column.toThrift());
TColumn tColumn = column.toThrift();
tColumn.setColumnName(tColumn.getColumnName().toLowerCase(Locale.ROOT));
columnsDesc.add(tColumn);
}
int bucket = 0;

View File

@ -71,6 +71,7 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
@ -281,7 +282,7 @@ public class SparkLoadPendingTask extends LoadTask {
private EtlColumn createEtlColumn(Column column) {
// column name
String name = column.getName();
String name = column.getName().toLowerCase(Locale.ROOT);
// column type
PrimitiveType type = column.getDataType();
String columnType = column.getDataType().toString();