[Fix](multi-catalog) Fix query hms tbl with compressed data files. (#19557)

If a hms table's file format is csv, uncompressed data files may be coexists with compressed data files, so we need to set compressType separately.
This commit is contained in:
Xiangyu Wang
2023-05-16 08:49:45 +08:00
committed by GitHub
parent e48524009d
commit 8284c342cb

View File

@ -217,7 +217,8 @@ public abstract class FileQueryScanNode extends FileScanNode {
params.setFormatType(fileFormatType);
TFileCompressType fileCompressType = getFileCompressType(inputSplit);
params.setCompressType(fileCompressType);
if (Util.isCsvFormat(fileFormatType) || fileFormatType == TFileFormatType.FORMAT_JSON) {
boolean isCsvOrJson = Util.isCsvFormat(fileFormatType) || fileFormatType == TFileFormatType.FORMAT_JSON;
if (isCsvOrJson) {
params.setFileAttributes(getFileAttributes());
}
@ -242,9 +243,19 @@ public abstract class FileQueryScanNode extends FileScanNode {
List<String> pathPartitionKeys = getPathPartitionKeys();
for (Split split : inputSplits) {
TScanRangeLocations curLocations = newLocations(params, backendPolicy);
FileSplit fileSplit = (FileSplit) split;
TFileScanRangeParams scanRangeParams;
if (!isCsvOrJson) {
scanRangeParams = params;
} else {
// If fileFormatType is csv/json format, uncompressed files may be coexists with compressed files
// So we need set compressType separately
scanRangeParams = new TFileScanRangeParams(params);
scanRangeParams.setCompressType(getFileCompressType(fileSplit));
}
TScanRangeLocations curLocations = newLocations(scanRangeParams, backendPolicy);
// If fileSplit has partition values, use the values collected from hive partitions.
// Otherwise, use the values in file path.
List<String> partitionValuesFromPath = fileSplit.getPartitionValues() == null