diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java index 857e7991a2..6b073b05e2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java @@ -24,6 +24,7 @@ import org.apache.doris.common.Config; import org.apache.doris.common.FeNameFormat; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.qe.ConnectContext; +import org.apache.doris.thrift.TFileCompressType; import org.apache.doris.thrift.TFileFormatType; import com.google.common.base.Preconditions; @@ -526,6 +527,8 @@ public class Util { return TFileFormatType.FORMAT_CSV_LZ4FRAME; } else if (lowerCasePath.endsWith(".lzo")) { return TFileFormatType.FORMAT_CSV_LZOP; + } else if (lowerCasePath.endsWith(".lzo_deflate")) { + return TFileFormatType.FORMAT_CSV_LZO; } else if (lowerCasePath.endsWith(".deflate")) { return TFileFormatType.FORMAT_CSV_DEFLATE; } else { @@ -533,11 +536,33 @@ public class Util { } } + @NotNull + public static TFileCompressType getFileCompressType(String path) { + String lowerCasePath = path.toLowerCase(); + if (lowerCasePath.endsWith(".gz")) { + return TFileCompressType.GZ; + } else if (lowerCasePath.endsWith(".bz2")) { + return TFileCompressType.BZ2; + } else if (lowerCasePath.endsWith(".lz4")) { + return TFileCompressType.LZ4FRAME; + } else if (lowerCasePath.endsWith(".lzo")) { + return TFileCompressType.LZOP; + } else if (lowerCasePath.endsWith(".lzo_deflate")) { + return TFileCompressType.LZO; + } else if (lowerCasePath.endsWith(".deflate")) { + return TFileCompressType.DEFLATE; + } else { + return TFileCompressType.PLAIN; + } + } + public static boolean isCsvFormat(TFileFormatType fileFormatType) { - return fileFormatType == TFileFormatType.FORMAT_CSV_BZ2 || fileFormatType == TFileFormatType.FORMAT_CSV_DEFLATE + return fileFormatType == TFileFormatType.FORMAT_CSV_BZ2 + || fileFormatType == TFileFormatType.FORMAT_CSV_DEFLATE || fileFormatType == TFileFormatType.FORMAT_CSV_GZ || fileFormatType == TFileFormatType.FORMAT_CSV_LZ4FRAME - || fileFormatType == TFileFormatType.FORMAT_CSV_LZO || fileFormatType == TFileFormatType.FORMAT_CSV_LZOP + || fileFormatType == TFileFormatType.FORMAT_CSV_LZO + || fileFormatType == TFileFormatType.FORMAT_CSV_LZOP || fileFormatType == TFileFormatType.FORMAT_CSV_PLAIN; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java index a36369325e..fc5f6721dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java @@ -32,6 +32,7 @@ import org.apache.doris.common.FeConstants; import org.apache.doris.common.NotImplementedException; import org.apache.doris.common.UserException; import org.apache.doris.common.util.BrokerUtil; +import org.apache.doris.common.util.Util; import org.apache.doris.nereids.glue.translator.PlanTranslatorContext; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.planner.external.iceberg.IcebergScanNode; @@ -41,6 +42,7 @@ import org.apache.doris.statistics.StatisticalType; import org.apache.doris.system.Backend; import org.apache.doris.thrift.TExternalScanRange; import org.apache.doris.thrift.TFileAttributes; +import org.apache.doris.thrift.TFileCompressType; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileRangeDesc; import org.apache.doris.thrift.TFileScanRange; @@ -212,8 +214,10 @@ public abstract class FileQueryScanNode extends FileScanNode { TFileType locationType = getLocationType(); params.setFileType(locationType); TFileFormatType fileFormatType = getFileFormatType(); - params.setFormatType(getFileFormatType()); - if (fileFormatType == TFileFormatType.FORMAT_CSV_PLAIN || fileFormatType == TFileFormatType.FORMAT_JSON) { + params.setFormatType(fileFormatType); + TFileCompressType fileCompressType = getFileCompressType(inputSplit); + params.setCompressType(fileCompressType); + if (Util.isCsvFormat(fileFormatType) || fileFormatType == TFileFormatType.FORMAT_JSON) { params.setFileAttributes(getFileAttributes()); } @@ -318,6 +322,10 @@ public abstract class FileQueryScanNode extends FileScanNode { throw new NotImplementedException(""); } + protected TFileCompressType getFileCompressType(FileSplit fileSplit) throws UserException { + return Util.getFileCompressType(fileSplit.getPath().toString()); + } + protected TFileAttributes getFileAttributes() throws UserException { throw new NotImplementedException(""); } diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index b2ac960a10..ba97ae77e5 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -127,7 +127,8 @@ enum TFileCompressType { LZO, BZ2, LZ4FRAME, - DEFLATE + DEFLATE, + LZOP, } struct THdfsConf {