[Fix](Outfile) Fix the column type mapping in the orc/parquet file format (#32281)
| Doris Type | Orc Type | Parquet Type | |---------------------|--------------------|------------------------| | Date | Long (logical: DATE) | int32 (Logical: Date) | | DateTime | TIMESTAMP (logical: TIMESTAMP) | int96 |
This commit is contained in:
@ -297,15 +297,21 @@ public class OutFileClause {
|
||||
}
|
||||
orcType = "string";
|
||||
break;
|
||||
case DATEV2:
|
||||
orcType = "date";
|
||||
break;
|
||||
case DATETIMEV2:
|
||||
orcType = "timestamp";
|
||||
break;
|
||||
case CHAR:
|
||||
orcType = "char(" + dorisType.getLength() + ")";
|
||||
break;
|
||||
case VARCHAR:
|
||||
orcType = "varchar(" + dorisType.getLength() + ")";
|
||||
break;
|
||||
case LARGEINT:
|
||||
case DATE:
|
||||
case DATETIME:
|
||||
case DATEV2:
|
||||
case CHAR:
|
||||
case VARCHAR:
|
||||
orcType = "string";
|
||||
break;
|
||||
case DECIMALV2:
|
||||
@ -402,74 +408,50 @@ public class OutFileClause {
|
||||
case FLOAT:
|
||||
case DOUBLE:
|
||||
case STRING:
|
||||
if (!schema.second.equals(resultType.getPrimitiveType().toString().toLowerCase())) {
|
||||
throw new AnalysisException("project field type is " + resultType.getPrimitiveType().toString()
|
||||
+ ", should use " + resultType.getPrimitiveType().toString() + ","
|
||||
+ " but the type of column " + i + " is " + schema.second);
|
||||
}
|
||||
checkOrcType(schema.second, resultType.getPrimitiveType().toString().toLowerCase(), true,
|
||||
resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
case DATEV2:
|
||||
checkOrcType(schema.second, "date", true, resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
case DATETIMEV2:
|
||||
if (!schema.second.equals("timestamp")) {
|
||||
throw new AnalysisException("project field type is " + resultType.getPrimitiveType().toString()
|
||||
+ ", should use timestamp, but the definition type of column " + i + " is "
|
||||
+ schema.second);
|
||||
}
|
||||
checkOrcType(schema.second, "timestamp", true, resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
case CHAR:
|
||||
checkOrcType(schema.second, "char", false, resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
case VARCHAR:
|
||||
checkOrcType(schema.second, "varchar", false, resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
case LARGEINT:
|
||||
case DATE:
|
||||
case DATETIME:
|
||||
case DATEV2:
|
||||
case CHAR:
|
||||
case VARCHAR:
|
||||
if (!schema.second.equals("string")) {
|
||||
throw new AnalysisException("project field type is " + resultType.getPrimitiveType().toString()
|
||||
+ ", should use string, but the definition type of column " + i + " is "
|
||||
+ schema.second);
|
||||
}
|
||||
checkOrcType(schema.second, "string", true, resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
case DECIMAL32:
|
||||
case DECIMAL64:
|
||||
case DECIMAL128:
|
||||
case DECIMALV2:
|
||||
if (!schema.second.startsWith("decimal")) {
|
||||
throw new AnalysisException("project field type is " + resultType.getPrimitiveType().toString()
|
||||
+ ", should use string, but the definition type of column " + i + " is "
|
||||
+ schema.second);
|
||||
}
|
||||
checkOrcType(schema.second, "decimal", false, resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
case HLL:
|
||||
case BITMAP:
|
||||
if (ConnectContext.get() != null && ConnectContext.get()
|
||||
.getSessionVariable().isReturnObjectDataAsBinary()) {
|
||||
if (!schema.second.equals("string")) {
|
||||
throw new AnalysisException("project field type is HLL/BITMAP, should use string, "
|
||||
+ "but the definition type of column " + i + " is " + schema.second);
|
||||
}
|
||||
checkOrcType(schema.second, "string", true, resultType.getPrimitiveType().toString());
|
||||
} else {
|
||||
throw new AnalysisException("Orc format does not support column type: "
|
||||
+ resultType.getPrimitiveType());
|
||||
}
|
||||
break;
|
||||
case STRUCT:
|
||||
if (!schema.second.startsWith("struct")) {
|
||||
throw new AnalysisException("project field type is " + resultType.getPrimitiveType().toString()
|
||||
+ ", should use struct, but the definition type of column " + i + " is "
|
||||
+ schema.second);
|
||||
}
|
||||
checkOrcType(schema.second, "struct", false, resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
case MAP:
|
||||
if (!schema.second.startsWith("map")) {
|
||||
throw new AnalysisException("project field type is " + resultType.getPrimitiveType().toString()
|
||||
+ ", should use map, but the definition type of column " + i + " is "
|
||||
+ schema.second);
|
||||
}
|
||||
checkOrcType(schema.second, "map", false, resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
case ARRAY:
|
||||
if (!schema.second.startsWith("array")) {
|
||||
throw new AnalysisException("project field type is " + resultType.getPrimitiveType().toString()
|
||||
+ ", should use array, but the definition type of column " + i + " is "
|
||||
+ schema.second);
|
||||
}
|
||||
checkOrcType(schema.second, "array", false, resultType.getPrimitiveType().toString());
|
||||
break;
|
||||
default:
|
||||
throw new AnalysisException("Orc format does not support column type: "
|
||||
@ -478,6 +460,22 @@ public class OutFileClause {
|
||||
}
|
||||
}
|
||||
|
||||
private void checkOrcType(String orcType, String expectType, boolean isEqual, String dorisType)
|
||||
throws AnalysisException {
|
||||
if (isEqual) {
|
||||
if (orcType.equals(expectType)) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (orcType.startsWith(expectType)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw new AnalysisException("project field type is " + dorisType
|
||||
+ ", should use " + expectType + ", but the definition type is " + orcType);
|
||||
}
|
||||
|
||||
|
||||
private void analyzeForParquetFormat(List<Expr> resultExprs, List<String> colLabels) throws AnalysisException {
|
||||
if (this.parquetSchemas.isEmpty()) {
|
||||
genParquetColumnName(resultExprs, colLabels);
|
||||
|
||||
Reference in New Issue
Block a user