[feature](stream load) (step one)Add arrow data type for stream load (#26709)
By using the Arrow data format, we can reduce the streamload of data transferred and improve the data import performance
This commit is contained in:
@ -357,6 +357,9 @@ public class DataDescription implements InsertStmt.DataDesc {
|
||||
case FORMAT_WAL:
|
||||
this.fileFormat = "wal";
|
||||
break;
|
||||
case FORMAT_ARROW:
|
||||
this.fileFormat = "arrow";
|
||||
break;
|
||||
default:
|
||||
this.fileFormat = "unknown";
|
||||
break;
|
||||
@ -1124,6 +1127,7 @@ public class DataDescription implements InsertStmt.DataDesc {
|
||||
&& !fileFormat.equalsIgnoreCase(FileFormatConstants.FORMAT_ORC)
|
||||
&& !fileFormat.equalsIgnoreCase(FileFormatConstants.FORMAT_JSON)
|
||||
&& !fileFormat.equalsIgnoreCase(FileFormatConstants.FORMAT_WAL)
|
||||
&& !fileFormat.equalsIgnoreCase(FileFormatConstants.FORMAT_ARROW)
|
||||
&& !fileFormat.equalsIgnoreCase(FileFormatConstants.FORMAT_HIVE_TEXT)) {
|
||||
throw new AnalysisException("File Format Type " + fileFormat + " is invalid.");
|
||||
}
|
||||
|
||||
@ -33,6 +33,7 @@ public class FileFormatConstants {
|
||||
public static final String FORMAT_JSON = "json";
|
||||
public static final String FORMAT_AVRO = "avro";
|
||||
public static final String FORMAT_WAL = "wal";
|
||||
public static final String FORMAT_ARROW = "arrow";
|
||||
|
||||
public static final String PROP_FORMAT = "format";
|
||||
public static final String PROP_COLUMN_SEPARATOR = "column_separator";
|
||||
|
||||
@ -564,8 +564,10 @@ public class Util {
|
||||
// TODO: Add TEXTFILE to TFileFormatType to Support hive text file format.
|
||||
|| lowerFileFormat.equals(FileFormatConstants.FORMAT_HIVE_TEXT)) {
|
||||
return TFileFormatType.FORMAT_CSV_PLAIN;
|
||||
} else if (lowerFileFormat.equals("wal")) {
|
||||
} else if (lowerFileFormat.equals(FileFormatConstants.FORMAT_WAL)) {
|
||||
return TFileFormatType.FORMAT_WAL;
|
||||
} else if (lowerFileFormat.equals(FileFormatConstants.FORMAT_ARROW)) {
|
||||
return TFileFormatType.FORMAT_ARROW;
|
||||
} else {
|
||||
return TFileFormatType.FORMAT_UNKNOWN;
|
||||
}
|
||||
|
||||
@ -344,10 +344,12 @@ public class Load {
|
||||
for (ImportColumnDesc importColumnDesc : copiedColumnExprs) {
|
||||
columnExprMap.put(importColumnDesc.getColumnName(), importColumnDesc.getExpr());
|
||||
}
|
||||
HashMap<String, Type> colToType = new HashMap<>();
|
||||
|
||||
// check default value and auto-increment column
|
||||
for (Column column : tbl.getBaseSchema()) {
|
||||
String columnName = column.getName();
|
||||
colToType.put(columnName, column.getType());
|
||||
if (columnExprMap.containsKey(columnName)) {
|
||||
continue;
|
||||
}
|
||||
@ -427,9 +429,15 @@ public class Load {
|
||||
exprsByName.put(realColName, expr);
|
||||
} else {
|
||||
SlotDescriptor slotDesc = analyzer.getDescTbl().addSlotDescriptor(srcTupleDesc);
|
||||
// columns default be varchar type
|
||||
slotDesc.setType(ScalarType.createType(PrimitiveType.VARCHAR));
|
||||
slotDesc.setColumn(new Column(realColName, PrimitiveType.VARCHAR));
|
||||
|
||||
if (formatType == TFileFormatType.FORMAT_ARROW) {
|
||||
slotDesc.setColumn(new Column(realColName, colToType.get(realColName)));
|
||||
} else {
|
||||
// columns default be varchar type
|
||||
slotDesc.setType(ScalarType.createType(PrimitiveType.VARCHAR));
|
||||
slotDesc.setColumn(new Column(realColName, PrimitiveType.VARCHAR));
|
||||
}
|
||||
|
||||
// ISSUE A: src slot should be nullable even if the column is not nullable.
|
||||
// because src slot is what we read from file, not represent to real column value.
|
||||
// If column is not nullable, error will be thrown when filling the dest slot,
|
||||
|
||||
Reference in New Issue
Block a user