From 06026b2f46c61ff048d2c94ed5dda29b9dfeb617 Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Thu, 21 Mar 2024 16:41:41 +0800 Subject: [PATCH] [opt](paimon) add split num and partition num (#32597) 1. And selected partition number indicator in PaimonScanNode in explain result. 2. Add raw file split and total split indicator in PaimonScanNode in explain result. 3. Opt the error msg of table valued function, print the error msg first to avoid the long msg be truncated. --- .../paimon/source/PaimonScanNode.java | 26 ++++++++++++++++--- .../functions/table/TableValuedFunction.java | 4 ++- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index 787168fdf7..733565e706 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -34,6 +34,7 @@ import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.ConnectContext; import org.apache.doris.spi.Split; import org.apache.doris.statistics.StatisticalType; +import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileRangeDesc; import org.apache.doris.thrift.TFileType; @@ -42,9 +43,11 @@ import org.apache.doris.thrift.TScanRangeLocations; import org.apache.doris.thrift.TTableFormatFileDesc; import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; import org.apache.hadoop.fs.Path; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.paimon.data.BinaryRow; import org.apache.paimon.predicate.Predicate; import org.apache.paimon.table.AbstractFileStoreTable; import org.apache.paimon.table.source.DataSplit; @@ -66,6 +69,8 @@ public class PaimonScanNode extends FileQueryScanNode { private static final Logger LOG = LogManager.getLogger(PaimonScanNode.class); private PaimonSource source = null; private List predicates; + private int rawFileSplitNum = 0; + private int paimonSplitNum = 0; public PaimonScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumnPriv) { super(id, desc, "PAIMON_SCAN_NODE", StatisticalType.PAIMON_SCAN_NODE, needCheckColumnPriv); @@ -144,12 +149,16 @@ public class PaimonScanNode extends FileQueryScanNode { .withProjection(projected) .newScan().plan().splits(); boolean supportNative = supportNativeReader(); + // Just for counting the number of selected partitions for this paimon table + Set selectedPartitionValues = Sets.newHashSet(); for (org.apache.paimon.table.source.Split split : paimonSplits) { if (!forceJniScanner && supportNative && split instanceof DataSplit) { DataSplit dataSplit = (DataSplit) split; - Optional> optRowFiles = dataSplit.convertToRawFiles(); - if (optRowFiles.isPresent()) { - List rawFiles = optRowFiles.get(); + BinaryRow partitionValue = dataSplit.partition(); + selectedPartitionValues.add(partitionValue); + Optional> optRawFiles = dataSplit.convertToRawFiles(); + if (optRawFiles.isPresent()) { + List rawFiles = optRawFiles.get(); for (RawFile file : rawFiles) { LocationPath locationPath = new LocationPath(file.path(), source.getCatalog().getProperties()); Path finalDataFilePath = locationPath.toScanRangeLocation(); @@ -164,17 +173,22 @@ public class PaimonScanNode extends FileQueryScanNode { true, null, PaimonSplit.PaimonSplitCreator.DEFAULT)); + ++rawFileSplitNum; } catch (IOException e) { throw new UserException("Paimon error to split file: " + e.getMessage(), e); } } } else { splits.add(new PaimonSplit(split)); + ++paimonSplitNum; } } else { splits.add(new PaimonSplit(split)); + ++paimonSplitNum; } } + this.readPartitionNum = selectedPartitionValues.size(); + // TODO: get total partition number return splits; } @@ -244,4 +258,10 @@ public class PaimonScanNode extends FileQueryScanNode { return map; } + @Override + public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { + return super.getNodeExplainString(prefix, detailLevel) + + String.format("%spaimonNativeReadSplits=%d/%d\n", + prefix, rawFileSplitNum, (paimonSplitNum + rawFileSplitNum)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java index c0969362ed..e3e2548169 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java @@ -53,7 +53,9 @@ public abstract class TableValuedFunction extends BoundFunction implements Unary } catch (AnalysisException e) { throw e; } catch (Throwable t) { - throw new AnalysisException("Can not build FunctionGenTable by " + this + ": " + t.getMessage(), t); + // Do not print the whole stmt, it is too long and may contain sensitive information + throw new AnalysisException( + "Can not build FunctionGenTable '" + this.getName() + "'. error: " + t.getMessage(), t); } });