From 8284c342cbc062db1b3c06b41e51d65f075c35f0 Mon Sep 17 00:00:00 2001 From: Xiangyu Wang Date: Tue, 16 May 2023 08:49:45 +0800 Subject: [PATCH] [Fix](multi-catalog) Fix query hms tbl with compressed data files. (#19557) If a hms table's file format is csv, uncompressed data files may be coexists with compressed data files, so we need to set compressType separately. --- .../doris/planner/external/FileQueryScanNode.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java index fc5f6721dc..3b5b1f8a3d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java @@ -217,7 +217,8 @@ public abstract class FileQueryScanNode extends FileScanNode { params.setFormatType(fileFormatType); TFileCompressType fileCompressType = getFileCompressType(inputSplit); params.setCompressType(fileCompressType); - if (Util.isCsvFormat(fileFormatType) || fileFormatType == TFileFormatType.FORMAT_JSON) { + boolean isCsvOrJson = Util.isCsvFormat(fileFormatType) || fileFormatType == TFileFormatType.FORMAT_JSON; + if (isCsvOrJson) { params.setFileAttributes(getFileAttributes()); } @@ -242,9 +243,19 @@ public abstract class FileQueryScanNode extends FileScanNode { List pathPartitionKeys = getPathPartitionKeys(); for (Split split : inputSplits) { - TScanRangeLocations curLocations = newLocations(params, backendPolicy); FileSplit fileSplit = (FileSplit) split; + TFileScanRangeParams scanRangeParams; + if (!isCsvOrJson) { + scanRangeParams = params; + } else { + // If fileFormatType is csv/json format, uncompressed files may be coexists with compressed files + // So we need set compressType separately + scanRangeParams = new TFileScanRangeParams(params); + scanRangeParams.setCompressType(getFileCompressType(fileSplit)); + } + TScanRangeLocations curLocations = newLocations(scanRangeParams, backendPolicy); + // If fileSplit has partition values, use the values collected from hive partitions. // Otherwise, use the values in file path. List partitionValuesFromPath = fileSplit.getPartitionValues() == null