[opt](paimon) add split num and partition num (#32597)

1. And selected partition number indicator in PaimonScanNode in explain result.
2. Add raw file split and total split indicator in PaimonScanNode in explain result.
3. Opt the error msg of table valued function, print the error msg first to avoid the long msg be truncated.
This commit is contained in:
Mingyu Chen
2024-03-21 16:41:41 +08:00
committed by yiguolei
parent 8d6561b32d
commit 06026b2f46
2 changed files with 26 additions and 4 deletions

View File

@ -34,6 +34,7 @@ import org.apache.doris.planner.PlanNodeId;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.spi.Split;
import org.apache.doris.statistics.StatisticalType;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TFileFormatType;
import org.apache.doris.thrift.TFileRangeDesc;
import org.apache.doris.thrift.TFileType;
@ -42,9 +43,11 @@ import org.apache.doris.thrift.TScanRangeLocations;
import org.apache.doris.thrift.TTableFormatFileDesc;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import org.apache.hadoop.fs.Path;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.table.AbstractFileStoreTable;
import org.apache.paimon.table.source.DataSplit;
@ -66,6 +69,8 @@ public class PaimonScanNode extends FileQueryScanNode {
private static final Logger LOG = LogManager.getLogger(PaimonScanNode.class);
private PaimonSource source = null;
private List<Predicate> predicates;
private int rawFileSplitNum = 0;
private int paimonSplitNum = 0;
public PaimonScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumnPriv) {
super(id, desc, "PAIMON_SCAN_NODE", StatisticalType.PAIMON_SCAN_NODE, needCheckColumnPriv);
@ -144,12 +149,16 @@ public class PaimonScanNode extends FileQueryScanNode {
.withProjection(projected)
.newScan().plan().splits();
boolean supportNative = supportNativeReader();
// Just for counting the number of selected partitions for this paimon table
Set<BinaryRow> selectedPartitionValues = Sets.newHashSet();
for (org.apache.paimon.table.source.Split split : paimonSplits) {
if (!forceJniScanner && supportNative && split instanceof DataSplit) {
DataSplit dataSplit = (DataSplit) split;
Optional<List<RawFile>> optRowFiles = dataSplit.convertToRawFiles();
if (optRowFiles.isPresent()) {
List<RawFile> rawFiles = optRowFiles.get();
BinaryRow partitionValue = dataSplit.partition();
selectedPartitionValues.add(partitionValue);
Optional<List<RawFile>> optRawFiles = dataSplit.convertToRawFiles();
if (optRawFiles.isPresent()) {
List<RawFile> rawFiles = optRawFiles.get();
for (RawFile file : rawFiles) {
LocationPath locationPath = new LocationPath(file.path(), source.getCatalog().getProperties());
Path finalDataFilePath = locationPath.toScanRangeLocation();
@ -164,17 +173,22 @@ public class PaimonScanNode extends FileQueryScanNode {
true,
null,
PaimonSplit.PaimonSplitCreator.DEFAULT));
++rawFileSplitNum;
} catch (IOException e) {
throw new UserException("Paimon error to split file: " + e.getMessage(), e);
}
}
} else {
splits.add(new PaimonSplit(split));
++paimonSplitNum;
}
} else {
splits.add(new PaimonSplit(split));
++paimonSplitNum;
}
}
this.readPartitionNum = selectedPartitionValues.size();
// TODO: get total partition number
return splits;
}
@ -244,4 +258,10 @@ public class PaimonScanNode extends FileQueryScanNode {
return map;
}
@Override
public String getNodeExplainString(String prefix, TExplainLevel detailLevel) {
return super.getNodeExplainString(prefix, detailLevel)
+ String.format("%spaimonNativeReadSplits=%d/%d\n",
prefix, rawFileSplitNum, (paimonSplitNum + rawFileSplitNum));
}
}

View File

@ -53,7 +53,9 @@ public abstract class TableValuedFunction extends BoundFunction implements Unary
} catch (AnalysisException e) {
throw e;
} catch (Throwable t) {
throw new AnalysisException("Can not build FunctionGenTable by " + this + ": " + t.getMessage(), t);
// Do not print the whole stmt, it is too long and may contain sensitive information
throw new AnalysisException(
"Can not build FunctionGenTable '" + this.getName() + "'. error: " + t.getMessage(), t);
}
});