[fix](iceberg) iceberg use customer method to encode special characters of field name (#27108)

Fix two bugs:
1. Missing column is case sensitive, change the column name to lower case in FE for hive/iceberg/hudi
2. Iceberg use custom method to encode special characters in column name. Decode the column name to match the right column in parquet reader.
This commit is contained in:
Ashin Gau
2023-11-17 18:38:55 +08:00
committed by GitHub
parent f8b61d3d8e
commit 52995c528e
12 changed files with 113 additions and 23 deletions

View File

@ -435,7 +435,7 @@ public class HMSExternalTable extends ExternalTable {
} else {
List<Column> tmpSchema = Lists.newArrayListWithCapacity(schema.size());
for (FieldSchema field : schema) {
tmpSchema.add(new Column(field.getName(),
tmpSchema.add(new Column(field.getName().toLowerCase(Locale.ROOT),
HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType()), true, null,
true, field.getComment(), true, -1));
}
@ -484,7 +484,7 @@ public class HMSExternalTable extends ExternalTable {
Schema schema = icebergTable.schema();
List<Column> tmpSchema = Lists.newArrayListWithCapacity(hmsSchema.size());
for (FieldSchema field : hmsSchema) {
tmpSchema.add(new Column(field.getName(),
tmpSchema.add(new Column(field.getName().toLowerCase(Locale.ROOT),
HiveMetaStoreClientHelper.hiveTypeToDorisType(field.getType(),
IcebergExternalTable.ICEBERG_DATETIME_SCALE_MS),
true, null, true, false, null, field.getComment(), true, null,
@ -500,7 +500,7 @@ public class HMSExternalTable extends ExternalTable {
for (String partitionKey : partitionKeys) {
// Do not use "getColumn()", which will cause dead loop
for (Column column : schema) {
if (partitionKey.equals(column.getName())) {
if (partitionKey.equalsIgnoreCase(column.getName())) {
// For partition column, if it is string type, change it to varchar(65535)
// to be same as doris managed table.
// This is to avoid some unexpected behavior such as different partition pruning result
@ -524,7 +524,7 @@ public class HMSExternalTable extends ExternalTable {
return getHiveColumnStats(colName);
case ICEBERG:
return StatisticsUtil.getIcebergColumnStats(colName,
Env.getCurrentEnv().getExtMetaCacheMgr().getIcebergMetadataCache().getIcebergTable(this));
Env.getCurrentEnv().getExtMetaCacheMgr().getIcebergMetadataCache().getIcebergTable(this));
default:
LOG.warn("get column stats for dlaType {} is not supported.", dlaType);
}

View File

@ -36,6 +36,7 @@ import org.apache.iceberg.types.Types;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
public class IcebergExternalTable extends ExternalTable {
@ -66,7 +67,7 @@ public class IcebergExternalTable extends ExternalTable {
List<Types.NestedField> columns = schema.columns();
List<Column> tmpSchema = Lists.newArrayListWithCapacity(columns.size());
for (Types.NestedField field : columns) {
tmpSchema.add(new Column(field.name(),
tmpSchema.add(new Column(field.name().toLowerCase(Locale.ROOT),
icebergTypeToDorisType(field.type()), true, null, true, field.doc(), true,
schema.caseInsensitiveFindField(field.name()).fieldId()));
}