[feature](sql-block-rule) sql block rule support external table (#37041) (#37765)

bp #37041
This commit is contained in:
Mingyu Chen
2024-08-08 11:50:52 +08:00
committed by GitHub
parent 357ec97851
commit c94b7377f1
15 changed files with 150 additions and 39 deletions

View File

@ -312,7 +312,7 @@ public abstract class FileQueryScanNode extends FileScanNode {
if (splitAssignment.getSampleSplit() == null && !(getLocationType() == TFileType.FILE_STREAM)) {
return;
}
inputSplitsNum = numApproximateSplits();
selectedSplitNum = numApproximateSplits();
TFileType locationType;
FileSplit fileSplit = (FileSplit) splitAssignment.getSampleSplit();
@ -322,7 +322,7 @@ public abstract class FileQueryScanNode extends FileScanNode {
} else {
locationType = getLocationType(fileSplit.getPath().toString());
}
totalFileSize = fileSplit.getLength() * inputSplitsNum;
totalFileSize = fileSplit.getLength() * selectedSplitNum;
long maxWaitTime = ConnectContext.get().getSessionVariable().getFetchSplitsMaxWaitTime();
// Not accurate, only used to estimate concurrency.
int numSplitsPerBE = numApproximateSplits() / backendPolicy.numBackends();
@ -350,7 +350,7 @@ public abstract class FileQueryScanNode extends FileScanNode {
if (ConnectContext.get().getExecutor() != null) {
ConnectContext.get().getExecutor().getSummaryProfile().setGetSplitsFinishTime();
}
inputSplitsNum = inputSplits.size();
selectedSplitNum = inputSplits.size();
if (inputSplits.isEmpty() && !(getLocationType() == TFileType.FILE_STREAM)) {
return;
}

View File

@ -65,10 +65,8 @@ public abstract class FileScanNode extends ExternalScanNode {
public static final long DEFAULT_SPLIT_SIZE = 64 * 1024 * 1024; // 64MB
// For explain
protected long inputSplitsNum = 0;
protected long totalFileSize = 0;
protected long totalPartitionNum = 0;
protected long readPartitionNum = 0;
protected long fileSplitSize;
protected boolean isSplitSizeSetBySession = false;
@ -127,9 +125,9 @@ public abstract class FileScanNode extends ExternalScanNode {
if (isBatchMode()) {
output.append("(approximate)");
}
output.append("inputSplitNum=").append(inputSplitsNum).append(", totalFileSize=")
output.append("inputSplitNum=").append(selectedSplitNum).append(", totalFileSize=")
.append(totalFileSize).append(", scanRanges=").append(scanRangeLocations.size()).append("\n");
output.append(prefix).append("partition=").append(readPartitionNum).append("/").append(totalPartitionNum)
output.append(prefix).append("partition=").append(selectedPartitionNum).append("/").append(totalPartitionNum)
.append("\n");
if (detailLevel == TExplainLevel.VERBOSE) {
@ -299,8 +297,4 @@ public abstract class FileScanNode extends ExternalScanNode {
long fileLength = last.getOffset() + last.getLength() - 1L;
throw new IllegalArgumentException(String.format("Offset %d is outside of file (0..%d)", offset, fileLength));
}
public long getReadPartitionNum() {
return this.readPartitionNum;
}
}

View File

@ -181,7 +181,7 @@ public class HiveScanNode extends FileQueryScanNode {
partitionItems = selectedPartitions.selectedPartitions.values();
}
Preconditions.checkNotNull(partitionItems);
this.readPartitionNum = partitionItems.size();
this.selectedPartitionNum = partitionItems.size();
// get partitions from cache
List<List<String>> partitionValuesList = Lists.newArrayListWithCapacity(partitionItems.size());
@ -198,7 +198,7 @@ public class HiveScanNode extends FileQueryScanNode {
hmsTable.getRemoteTable().getSd().getInputFormat(),
hmsTable.getRemoteTable().getSd().getLocation(), null, Maps.newHashMap());
this.totalPartitionNum = 1;
this.readPartitionNum = 1;
this.selectedPartitionNum = 1;
resPartitions.add(dummyPartition);
}
if (ConnectContext.get().getExecutor() != null) {

View File

@ -285,7 +285,7 @@ public class HudiScanNode extends HiveScanNode {
partitionValues.getSingleColumnRangeMap(),
true);
Collection<Long> filteredPartitionIds = pruner.prune();
this.readPartitionNum = filteredPartitionIds.size();
this.selectedPartitionNum = filteredPartitionIds.size();
// 3. get partitions from cache
String dbName = hmsTable.getDbName();
String tblName = hmsTable.getName();
@ -310,7 +310,7 @@ public class HudiScanNode extends HiveScanNode {
hmsTable.getRemoteTable().getSd().getInputFormat(),
hmsTable.getRemoteTable().getSd().getLocation(), null, Maps.newHashMap());
this.totalPartitionNum = 1;
this.readPartitionNum = 1;
this.selectedPartitionNum = 1;
return Lists.newArrayList(dummyPartition);
}
@ -502,7 +502,7 @@ public class HudiScanNode extends HiveScanNode {
return super.getNodeExplainString(prefix, detailLevel);
} else {
return super.getNodeExplainString(prefix, detailLevel)
+ String.format("%shudiNativeReadSplits=%d/%d\n", prefix, noLogsSplitNum.get(), inputSplitsNum);
+ String.format("%shudiNativeReadSplits=%d/%d\n", prefix, noLogsSplitNum.get(), selectedSplitNum);
}
}
}

View File

@ -266,7 +266,7 @@ public class IcebergScanNode extends FileQueryScanNode {
return splits.isEmpty() ? splits : Collections.singletonList(splits.get(0));
}
readPartitionNum = partitionPathSet.size();
selectedPartitionNum = partitionPathSet.size();
return splits;
}

View File

@ -197,7 +197,7 @@ public class MaxComputeScanNode extends FileQueryScanNode {
partitionValues.getSingleColumnRangeMap(),
false);
Collection<Long> filteredPartitionIds = pruner.prune();
this.readPartitionNum = filteredPartitionIds.size();
this.selectedPartitionNum = filteredPartitionIds.size();
// get partitions from cache
Map<Long, String> partitionIdToNameMap = partitionValues.getPartitionIdToNameMap();
filteredPartitionIds.forEach(id -> result.add(partitionIdToNameMap.get(id)));

View File

@ -270,7 +270,7 @@ public class PaimonScanNode extends FileQueryScanNode {
}
splitStats.add(splitStat);
}
this.readPartitionNum = selectedPartitionValues.size();
this.selectedPartitionNum = selectedPartitionValues.size();
// TODO: get total partition number
return splits;
}

View File

@ -205,7 +205,7 @@ public class FileLoadScanNode extends FileScanNode {
LoadScanProvider scanProvider = scanProviders.get(i);
finalizeParamsForLoad(context, analyzer);
createScanRangeLocations(context, scanProvider, localBackendPolicy);
this.inputSplitsNum += scanProvider.getInputSplitNum();
this.selectedSplitNum += scanProvider.getInputSplitNum();
this.totalFileSize += scanProvider.getInputFileSize();
}
}

View File

@ -164,10 +164,8 @@ public class OlapScanNode extends ScanNode {
private boolean canTurnOnPreAggr = true;
private boolean forceOpenPreAgg = false;
private OlapTable olapTable = null;
private long selectedTabletsNum = 0;
private long totalTabletsNum = 0;
private long selectedIndexId = -1;
private int selectedPartitionNum = 0;
private Collection<Long> selectedPartitionIds = Lists.newArrayList();
private long totalBytes = 0;
@ -299,14 +297,6 @@ public class OlapScanNode extends ScanNode {
this.forceOpenPreAgg = forceOpenPreAgg;
}
public Integer getSelectedPartitionNum() {
return selectedPartitionNum;
}
public Long getSelectedTabletsNum() {
return selectedTabletsNum;
}
public SortInfo getSortInfo() {
return sortInfo;
}
@ -1175,7 +1165,7 @@ public class OlapScanNode extends ScanNode {
}
totalTabletsNum += selectedTable.getTablets().size();
selectedTabletsNum += tablets.size();
selectedSplitNum += tablets.size();
addScanRangeLocations(partition, tablets);
}
}
@ -1337,7 +1327,7 @@ public class OlapScanNode extends ScanNode {
.collect(Collectors.joining(","));
output.append(prefix).append(String.format("partitions=%s/%s (%s)", selectedPartitionNum,
olapTable.getPartitions().size(), selectedPartitions)).append("\n");
output.append(prefix).append(String.format("tablets=%s/%s", selectedTabletsNum, totalTabletsNum));
output.append(prefix).append(String.format("tablets=%s/%s", selectedSplitNum, totalTabletsNum));
// We print up to 3 tablet, and we print "..." if the number is more than 3
if (scanTabletIds.size() > 3) {
List<Long> firstTenTabletIds = scanTabletIds.subList(0, 3);

View File

@ -94,6 +94,9 @@ public abstract class ScanNode extends PlanNode implements SplitGenerator {
protected PartitionInfo partitionsInfo = null;
protected SplitAssignment splitAssignment = null;
protected long selectedPartitionNum = 0;
protected long selectedSplitNum = 0;
// create a mapping between output slot's id and project expr
Map<SlotId, Expr> outputSlotToProjectExpr = new HashMap<>();
@ -741,4 +744,12 @@ public abstract class ScanNode extends PlanNode implements SplitGenerator {
long limitRowsForSingleInstance = ctx == null ? 10000 : ctx.getSessionVariable().limitRowsForSingleInstance;
return hasLimit() && getLimit() < limitRowsForSingleInstance && conjuncts.isEmpty();
}
public long getSelectedPartitionNum() {
return selectedPartitionNum;
}
public long getSelectedSplitNum() {
return selectedSplitNum;
}
}

View File

@ -118,6 +118,7 @@ import org.apache.doris.common.util.ProfileManager.ProfileType;
import org.apache.doris.common.util.SqlParserUtils;
import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.FileScanNode;
import org.apache.doris.datasource.jdbc.client.JdbcClientException;
import org.apache.doris.datasource.tvf.source.TVFScanNode;
import org.apache.doris.load.EtlJobType;
@ -630,13 +631,13 @@ public class StmtExecutor {
}
List<ScanNode> scanNodeList = planner.getScanNodes();
for (ScanNode scanNode : scanNodeList) {
if (scanNode instanceof OlapScanNode) {
OlapScanNode olapScanNode = (OlapScanNode) scanNode;
if (scanNode instanceof OlapScanNode || scanNode instanceof FileScanNode) {
Env.getCurrentEnv().getSqlBlockRuleMgr().checkLimitations(
olapScanNode.getSelectedPartitionNum().longValue(),
olapScanNode.getSelectedTabletsNum(),
olapScanNode.getCardinality(),
scanNode.getSelectedPartitionNum(),
scanNode.getSelectedSplitNum(),
scanNode.getCardinality(),
context.getQualifiedUser());
}
}
}

View File

@ -714,7 +714,7 @@ public class CacheAnalyzer {
private CacheTable buildCacheTableForHiveScanNode(HiveScanNode node) {
CacheTable cacheTable = new CacheTable();
cacheTable.table = node.getTargetTable();
cacheTable.partitionNum = node.getReadPartitionNum();
cacheTable.partitionNum = node.getSelectedPartitionNum();
cacheTable.latestPartitionTime = cacheTable.table.getUpdateTime();
TableIf tableIf = cacheTable.table;
DatabaseIf database = tableIf.getDatabase();