[Fix](orc-reader) Fix Wrong data type for column error when column order in hive table is not same in orc file schema. (#21306)

`Wrong data type for column` error when column order in hive table is not same in orc file schema.

The root cause is in order to handle the following case:

The table in orc format of Hive 1.x may encounter system column names such as `_col0`, `_col1`, `_col2`... in the underlying orc file schema, which need to use the column names in the hive table for mapping.

### Solution
Currently fix this issue by handling the following case by specifying hive version to 1.x.x in the hive catalog configuration.

```sql
CREATE CATALOG hive PROPERTIES (
    'hive.version' = '1.x.x'
);
```
This commit is contained in:
Qi Chen
2023-07-03 09:32:55 +08:00
committed by GitHub
parent ca0953ea51
commit 124516c1ea
11 changed files with 161 additions and 2 deletions

View File

@ -341,6 +341,10 @@ public class HMSExternalTable extends ExternalTable {
return ((HMSExternalCatalog) catalog).getHiveMetastoreUris();
}
public String getHiveVersion() {
return ((HMSExternalCatalog) catalog).getHiveVersion();
}
public Map<String, String> getCatalogProperties() {
return catalog.getProperties();
}

View File

@ -124,6 +124,10 @@ public class HMSExternalCatalog extends ExternalCatalog {
return catalogProperty.getOrDefault(HMSProperties.HIVE_METASTORE_URIS, "");
}
public String getHiveVersion() {
return catalogProperty.getOrDefault(HMSProperties.HIVE_VERSION, "");
}
protected List<String> listDatabaseNames() {
return client.getAllDatabases();
}

View File

@ -72,4 +72,26 @@ public class HiveVersionUtil {
return DEFAULT_HIVE_VERSION;
}
}
public static boolean isHive1(String version) {
if (Strings.isNullOrEmpty(version)) {
return false;
}
String[] parts = version.split("\\.");
if (parts.length < 2) {
LOG.warn("invalid hive version: " + version);
return false;
}
try {
int major = Integer.parseInt(parts[0]);
if (major == 1) {
return true;
} else {
return false;
}
} catch (NumberFormatException e) {
LOG.warn("invalid hive version: " + version);
return false;
}
}
}

View File

@ -37,6 +37,7 @@ import org.apache.doris.datasource.hive.HiveMetaStoreCache;
import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue;
import org.apache.doris.datasource.hive.HivePartition;
import org.apache.doris.datasource.hive.HiveTransaction;
import org.apache.doris.datasource.hive.HiveVersionUtil;
import org.apache.doris.planner.ListPartitionPrunerV2;
import org.apache.doris.planner.PlanNodeId;
import org.apache.doris.planner.external.HiveSplit.HiveSplitCreator;
@ -91,7 +92,9 @@ public class HiveScanNode extends FileQueryScanNode {
@Override
protected void doInitialize() throws UserException {
super.doInitialize();
genSlotToSchemaIdMap();
if (HiveVersionUtil.isHive1(hmsTable.getHiveVersion())) {
genSlotToSchemaIdMap();
}
String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat();
if (inputFormat.contains("TextInputFormat")) {
for (SlotDescriptor slot : desc.getSlots()) {