[fix](iceberg) iceberg use customer method to encode special characters of field name (#27108)
Fix two bugs: 1. Missing column is case sensitive, change the column name to lower case in FE for hive/iceberg/hudi 2. Iceberg use custom method to encode special characters in column name. Decode the column name to match the right column in parquet reader.
This commit is contained in:
@ -435,7 +435,7 @@ public class HMSExternalTable extends ExternalTable {
|
||||
} else {
|
||||
List<Column> tmpSchema = Lists.newArrayListWithCapacity(schema.size());
|
||||
for (FieldSchema field : schema) {
|
||||
tmpSchema.add(new Column(field.getName(),
|
||||
tmpSchema.add(new Column(field.getName().toLowerCase(Locale.ROOT),
|
||||
HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType()), true, null,
|
||||
true, field.getComment(), true, -1));
|
||||
}
|
||||
@ -484,7 +484,7 @@ public class HMSExternalTable extends ExternalTable {
|
||||
Schema schema = icebergTable.schema();
|
||||
List<Column> tmpSchema = Lists.newArrayListWithCapacity(hmsSchema.size());
|
||||
for (FieldSchema field : hmsSchema) {
|
||||
tmpSchema.add(new Column(field.getName(),
|
||||
tmpSchema.add(new Column(field.getName().toLowerCase(Locale.ROOT),
|
||||
HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType(),
|
||||
IcebergExternalTable.ICEBERG_DATETIME_SCALE_MS),
|
||||
true, null, true, false, null, field.getComment(), true, null,
|
||||
@ -500,7 +500,7 @@ public class HMSExternalTable extends ExternalTable {
|
||||
for (String partitionKey : partitionKeys) {
|
||||
// Do not use "getColumn()", which will cause dead loop
|
||||
for (Column column : schema) {
|
||||
if (partitionKey.equals(column.getName())) {
|
||||
if (partitionKey.equalsIgnoreCase(column.getName())) {
|
||||
// For partition column, if it is string type, change it to varchar(65535)
|
||||
// to be same as doris managed table.
|
||||
// This is to avoid some unexpected behavior such as different partition pruning result
|
||||
@ -524,7 +524,7 @@ public class HMSExternalTable extends ExternalTable {
|
||||
return getHiveColumnStats(colName);
|
||||
case ICEBERG:
|
||||
return StatisticsUtil.getIcebergColumnStats(colName,
|
||||
Env.getCurrentEnv().getExtMetaCacheMgr().getIcebergMetadataCache().getIcebergTable(this));
|
||||
Env.getCurrentEnv().getExtMetaCacheMgr().getIcebergMetadataCache().getIcebergTable(this));
|
||||
default:
|
||||
LOG.warn("get column stats for dlaType {} is not supported.", dlaType);
|
||||
}
|
||||
|
||||
@ -36,6 +36,7 @@ import org.apache.iceberg.types.Types;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
|
||||
public class IcebergExternalTable extends ExternalTable {
|
||||
@ -66,7 +67,7 @@ public class IcebergExternalTable extends ExternalTable {
|
||||
List<Types.NestedField> columns = schema.columns();
|
||||
List<Column> tmpSchema = Lists.newArrayListWithCapacity(columns.size());
|
||||
for (Types.NestedField field : columns) {
|
||||
tmpSchema.add(new Column(field.name(),
|
||||
tmpSchema.add(new Column(field.name().toLowerCase(Locale.ROOT),
|
||||
icebergTypeToDorisType(field.type()), true, null, true, field.doc(), true,
|
||||
schema.caseInsensitiveFindField(field.name()).fieldId()));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user