[Fix](multi-catalog) Fix query hms tbl with compressed data files. (#19557)

If a hms table's file format is csv, uncompressed data files may be coexists with compressed data files, so we need to set compressType separately.
2023-05-16 08:49:45 +08:00
parent e48524009d
commit 8284c342cb
1 changed files with 13 additions and 2 deletions
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
@ -217,7 +217,8 @@ public abstract class FileQueryScanNode extends FileScanNode {
        params.setFormatType(fileFormatType);
        TFileCompressType fileCompressType = getFileCompressType(inputSplit);
        params.setCompressType(fileCompressType);
-        if (Util.isCsvFormat(fileFormatType) || fileFormatType == TFileFormatType.FORMAT_JSON) {
+        boolean isCsvOrJson = Util.isCsvFormat(fileFormatType) || fileFormatType == TFileFormatType.FORMAT_JSON;
+        if (isCsvOrJson) {
            params.setFileAttributes(getFileAttributes());
        }

@ -242,9 +243,19 @@ public abstract class FileQueryScanNode extends FileScanNode {

        List<String> pathPartitionKeys = getPathPartitionKeys();
        for (Split split : inputSplits) {
-            TScanRangeLocations curLocations = newLocations(params, backendPolicy);
            FileSplit fileSplit = (FileSplit) split;

+            TFileScanRangeParams scanRangeParams;
+            if (!isCsvOrJson) {
+                scanRangeParams = params;
+            } else {
+                // If fileFormatType is csv/json format, uncompressed files may be coexists with compressed files
+                // So we need set compressType separately
+                scanRangeParams = new TFileScanRangeParams(params);
+                scanRangeParams.setCompressType(getFileCompressType(fileSplit));
+            }
+            TScanRangeLocations curLocations = newLocations(scanRangeParams, backendPolicy);
+
            // If fileSplit has partition values, use the values collected from hive partitions.
            // Otherwise, use the values in file path.
            List<String> partitionValuesFromPath = fileSplit.getPartitionValues() == null