[opt](hive) support orc generated from hive 1.x for all file scan node (#28806)

This commit is contained in:
Mingyu Chen
2024-01-06 17:33:16 +08:00
committed by GitHub
parent 720bee7c1e
commit 2adb0fcc50
4 changed files with 59 additions and 34 deletions

View File

@ -275,6 +275,9 @@ public abstract class FileQueryScanNode extends FileScanNode {
return;
}
TFileFormatType fileFormatType = getFileFormatType();
if (fileFormatType == TFileFormatType.FORMAT_ORC) {
genSlotToSchemaIdMapForOrc();
}
params.setFormatType(fileFormatType);
boolean isCsvOrJson = Util.isCsvFormat(fileFormatType) || fileFormatType == TFileFormatType.FORMAT_JSON;
boolean isWal = fileFormatType == TFileFormatType.FORMAT_WAL;
@ -463,6 +466,25 @@ public abstract class FileQueryScanNode extends FileScanNode {
return rangeDesc;
}
// To Support Hive 1.x orc internal column name like (_col0, _col1, _col2...)
// We need to save mapping from slot name to schema position
protected void genSlotToSchemaIdMapForOrc() {
Preconditions.checkNotNull(params);
List<Column> baseSchema = desc.getTable().getBaseSchema();
Map<String, Integer> columnNameToPosition = Maps.newHashMap();
for (SlotDescriptor slot : desc.getSlots()) {
int idx = 0;
for (Column col : baseSchema) {
if (col.getName().equals(slot.getColumn().getName())) {
columnNameToPosition.put(col.getName(), idx);
break;
}
idx += 1;
}
}
params.setSlotNameToSchemaPos(columnNameToPosition);
}
protected abstract TFileType getLocationType() throws UserException;
protected abstract TFileType getLocationType(String location) throws UserException;

View File

@ -18,7 +18,6 @@
package org.apache.doris.planner.external;
import org.apache.doris.analysis.FunctionCallExpr;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
@ -39,7 +38,6 @@ import org.apache.doris.datasource.hive.HiveMetaStoreCache;
import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue;
import org.apache.doris.datasource.hive.HivePartition;
import org.apache.doris.datasource.hive.HiveTransaction;
import org.apache.doris.datasource.hive.HiveVersionUtil;
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
import org.apache.doris.planner.ListPartitionPrunerV2;
import org.apache.doris.planner.PlanNodeId;
@ -55,7 +53,6 @@ import org.apache.doris.thrift.TFileType;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import lombok.Setter;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@ -117,9 +114,6 @@ public class HiveScanNode extends FileQueryScanNode {
@Override
protected void doInitialize() throws UserException {
super.doInitialize();
if (HiveVersionUtil.isHive1(hmsTable.getHiveVersion())) {
genSlotToSchemaIdMap();
}
if (hmsTable.isHiveTransactionalTable()) {
this.hiveTransaction = new HiveTransaction(DebugUtil.printId(ConnectContext.get().queryId()),
@ -396,23 +390,6 @@ public class HiveScanNode extends FileQueryScanNode {
return fileAttributes;
}
// To Support Hive 1.x orc internal column name like (_col0, _col1, _col2...)
private void genSlotToSchemaIdMap() {
List<Column> baseSchema = desc.getTable().getBaseSchema();
Map<String, Integer> columnNameToPosition = Maps.newHashMap();
for (SlotDescriptor slot : desc.getSlots()) {
int idx = 0;
for (Column col : baseSchema) {
if (col.getName().equals(slot.getColumn().getName())) {
columnNameToPosition.put(col.getName(), idx);
break;
}
idx += 1;
}
}
params.setSlotNameToSchemaPos(columnNameToPosition);
}
@Override
public boolean pushDownAggNoGrouping(FunctionCallExpr aggExpr) {