branch-2.1: [opt](hive) add option to get schema from table object #50038 (#50269)

Cherry-picked from #50038

Co-authored-by: Mingyu Chen (Rayner) <morningman@163.com>
This commit is contained in:
github-actions[bot]
2025-04-22 14:25:03 +08:00
committed by GitHub
parent 34cb4f05e8
commit 1efe62c7ba
4 changed files with 144 additions and 3 deletions

View File

@ -75,6 +75,13 @@ public class HMSExternalCatalog extends ExternalCatalog {
public static final String FILE_META_CACHE_TTL_SECOND = "file.meta.cache.ttl-second";
// broker name for file split and query scan.
public static final String BIND_BROKER_NAME = "broker.name";
// Default is false, if set to true, will get table schema from "remoteTable" instead of from hive metastore.
// This is because for some forward compatiblity issue of hive metastore, there maybe
// "storage schema reading not support" error being thrown.
// set this to true can avoid this error.
// But notice that if set to true, the default value of column will be ignored because we cannot get default value
// from remoteTable object.
public static final String GET_SCHEMA_FROM_TABLE = "get_schema_from_table";
// -1 means file cache no ttl set
public static final int FILE_META_CACHE_NO_TTL = -1;

View File

@ -75,6 +75,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.logging.log4j.LogManager;
@ -573,9 +574,18 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI
}
private Optional<SchemaCacheValue> getHiveSchema() {
HMSCachedClient client = ((HMSExternalCatalog) catalog).getClient();
List<FieldSchema> schema = client.getSchema(dbName, name);
Map<String, String> colDefaultValues = client.getDefaultColumnValues(dbName, name);
boolean getFromTable = catalog.getCatalogProperty()
.getOrDefault(HMSExternalCatalog.GET_SCHEMA_FROM_TABLE, "false")
.equalsIgnoreCase("true");
List<FieldSchema> schema = null;
Map<String, String> colDefaultValues = Maps.newHashMap();
if (getFromTable) {
schema = getSchemaFromRemoteTable(remoteTable);
} else {
HMSCachedClient client = ((HMSExternalCatalog) catalog).getClient();
schema = client.getSchema(dbName, name);
colDefaultValues = client.getDefaultColumnValues(dbName, name);
}
List<Column> columns = Lists.newArrayListWithCapacity(schema.size());
for (FieldSchema field : schema) {
String fieldName = field.getName().toLowerCase(Locale.ROOT);
@ -588,6 +598,13 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI
return Optional.of(new HMSSchemaCacheValue(columns, partitionColumns));
}
private static List<FieldSchema> getSchemaFromRemoteTable(Table table) {
List<FieldSchema> schema = Lists.newArrayList();
schema.addAll(table.getSd().getCols());
schema.addAll(table.getPartitionKeys());
return schema;
}
@Override
public long fetchRowCount() {
makeSureInitialized();