[fix](hive) support find serde info from both tbl properties and serde properties (#37043) (#37188)

bp #37043
This commit is contained in:
Mingyu Chen
2024-07-04 13:55:38 +08:00
committed by GitHub
parent 5f3e1e44b2
commit 3613413a54
6 changed files with 120 additions and 25 deletions

View File

@ -51,6 +51,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@ -79,6 +80,7 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@ -847,4 +849,28 @@ public class HiveMetaStoreClientHelper {
}
return conf;
}
public static Optional<String> getSerdeProperty(Table table, String key) {
String valueFromSd = table.getSd().getSerdeInfo().getParameters().get(key);
String valueFromTbl = table.getParameters().get(key);
return firstNonNullable(valueFromTbl, valueFromSd);
}
private static Optional<String> firstNonNullable(String... values) {
for (String value : values) {
if (!Strings.isNullOrEmpty(value)) {
return Optional.of(value);
}
}
return Optional.empty();
}
public static String firstPresentOrDefault(String defaultValue, Optional<String>... values) {
for (Optional<String> value : values) {
if (value.isPresent()) {
return value.get();
}
}
return defaultValue;
}
}

View File

@ -85,7 +85,7 @@ public class HiveScanNode extends FileQueryScanNode {
public static final String PROP_LINE_DELIMITER = "line.delim";
public static final String DEFAULT_LINE_DELIMITER = "\n";
public static final String PROP_SEPARATOR_CHAR = "separatorChar";
public static final String PROP_QUOTA_CHAR = "quoteChar";
public static final String PROP_QUOTE_CHAR = "quoteChar";
public static final String PROP_COLLECTION_DELIMITER_HIVE2 = "colelction.delim";
public static final String PROP_COLLECTION_DELIMITER_HIVE3 = "collection.delim";
@ -445,32 +445,37 @@ public class HiveScanNode extends FileQueryScanNode {
@Override
protected TFileAttributes getFileAttributes() throws UserException {
TFileTextScanRangeParams textParams = new TFileTextScanRangeParams();
java.util.Map<String, String> delimiter = hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
if (delimiter.containsKey(PROP_FIELD_DELIMITER)) {
if (delimiter.get(PROP_FIELD_DELIMITER).length() == 0) {
textParams.setColumnSeparator(DEFAULT_FIELD_DELIMITER);
} else {
textParams.setColumnSeparator(delimiter.get(PROP_FIELD_DELIMITER));
}
} else if (delimiter.containsKey(PROP_SEPARATOR_CHAR)) {
textParams.setColumnSeparator(delimiter.get(PROP_SEPARATOR_CHAR));
} else {
textParams.setColumnSeparator(DEFAULT_FIELD_DELIMITER);
}
if (delimiter.containsKey(PROP_QUOTA_CHAR)) {
textParams.setEnclose(delimiter.get(PROP_QUOTA_CHAR).getBytes()[0]);
}
textParams.setLineDelimiter(delimiter.getOrDefault(PROP_LINE_DELIMITER, DEFAULT_LINE_DELIMITER));
textParams.setMapkvDelimiter(delimiter.getOrDefault(PROP_MAP_KV_DELIMITER, DEFAULT_MAP_KV_DELIMITER));
// textParams.collection_delimiter field is map, array and struct delimiter;
if (delimiter.get(PROP_COLLECTION_DELIMITER_HIVE2) != null) {
textParams.setCollectionDelimiter(delimiter.get(PROP_COLLECTION_DELIMITER_HIVE2));
} else if (delimiter.get(PROP_COLLECTION_DELIMITER_HIVE3) != null) {
textParams.setCollectionDelimiter(delimiter.get(PROP_COLLECTION_DELIMITER_HIVE3));
} else {
textParams.setCollectionDelimiter(DEFAULT_COLLECTION_DELIMITER);
// 1. set column separator
Optional<String> fieldDelim =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_FIELD_DELIMITER);
Optional<String> columnSeparator =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_SEPARATOR_CHAR);
textParams.setColumnSeparator(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_FIELD_DELIMITER, fieldDelim, columnSeparator));
// 2. set line delimiter
Optional<String> lineDelim =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_LINE_DELIMITER);
textParams.setLineDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_LINE_DELIMITER, lineDelim));
// 3. set mapkv delimiter
Optional<String> mapkvDelim =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_MAP_KV_DELIMITER);
textParams.setMapkvDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_MAP_KV_DELIMITER, mapkvDelim));
// 4. set collection delimiter
Optional<String> collectionDelimHive2 =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_COLLECTION_DELIMITER_HIVE2);
Optional<String> collectionDelimHive3 =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(), PROP_COLLECTION_DELIMITER_HIVE3);
textParams.setCollectionDelimiter(HiveMetaStoreClientHelper.firstPresentOrDefault(
DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, collectionDelimHive3));
// 5. set quote char
Map<String, String> serdeParams = hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters();
if (serdeParams.containsKey(PROP_QUOTE_CHAR)) {
textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]);
}
TFileAttributes fileAttributes = new TFileAttributes();
fileAttributes.setTextParams(textParams);
fileAttributes.setHeaderType("");
@ -502,3 +507,4 @@ public class HiveScanNode extends FileQueryScanNode {
return compressType;
}
}