[opt](split) add max wait time of getting splits (#36842)

bp: #36843
This commit is contained in:
Ashin Gau
2024-07-01 22:05:25 +08:00
committed by GitHub
parent 72c20d3ccc
commit e686e85f27
9 changed files with 41 additions and 24 deletions

View File

@ -337,10 +337,11 @@ public abstract class FileQueryScanNode extends FileScanNode {
locationType = getLocationType(fileSplit.getPath().toString());
}
totalFileSize = fileSplit.getLength() * inputSplitsNum;
long maxWaitTime = ConnectContext.get().getSessionVariable().getFetchSplitsMaxWaitTime();
// Not accurate, only used to estimate concurrency.
int numSplitsPerBE = numApproximateSplits() / backendPolicy.numBackends();
for (Backend backend : backendPolicy.getBackends()) {
SplitSource splitSource = new SplitSource(backend, splitAssignment);
SplitSource splitSource = new SplitSource(backend, splitAssignment, maxWaitTime);
splitSources.add(splitSource);
Env.getCurrentEnv().getSplitSourceManager().registerSplitSource(splitSource);
TScanRangeLocations curLocations = newLocations();

View File

@ -44,17 +44,18 @@ import java.util.concurrent.atomic.AtomicLong;
public class SplitSource {
private static final AtomicLong UNIQUE_ID_GENERATOR = new AtomicLong(0);
private static final long WAIT_TIME_OUT = 100; // 100ms
private static final long MAX_WAIT_TIME_OUT = 500; // 500ms
private final long uniqueId;
private final Backend backend;
private final SplitAssignment splitAssignment;
private final AtomicBoolean isLastBatch;
private final long maxWaitTime;
public SplitSource(Backend backend, SplitAssignment splitAssignment) {
public SplitSource(Backend backend, SplitAssignment splitAssignment, long maxWaitTime) {
this.uniqueId = UNIQUE_ID_GENERATOR.getAndIncrement();
this.backend = backend;
this.splitAssignment = splitAssignment;
this.maxWaitTime = maxWaitTime;
this.isLastBatch = new AtomicBoolean(false);
splitAssignment.registerSource(uniqueId);
}
@ -71,7 +72,7 @@ public class SplitSource {
return Collections.emptyList();
}
List<TScanRangeLocations> scanRanges = Lists.newArrayListWithExpectedSize(maxBatchSize);
long maxTimeOut = 0;
long startTime = System.currentTimeMillis();
while (scanRanges.size() < maxBatchSize) {
BlockingQueue<Collection<TScanRangeLocations>> splits = splitAssignment.getAssignedSplits(backend);
if (splits == null) {
@ -81,18 +82,19 @@ public class SplitSource {
while (scanRanges.size() < maxBatchSize) {
try {
Collection<TScanRangeLocations> splitCollection = splits.poll(WAIT_TIME_OUT, TimeUnit.MILLISECONDS);
if (splitCollection != null) {
scanRanges.addAll(splitCollection);
}
if (!scanRanges.isEmpty() && System.currentTimeMillis() - startTime > maxWaitTime) {
return scanRanges;
}
if (splitCollection == null) {
maxTimeOut += WAIT_TIME_OUT;
break;
}
scanRanges.addAll(splitCollection);
} catch (InterruptedException e) {
throw new UserException("Failed to get next batch of splits", e);
}
}
if (maxTimeOut >= MAX_WAIT_TIME_OUT && !scanRanges.isEmpty()) {
break;
}
}
return scanRanges;
}

View File

@ -419,6 +419,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String NUM_PARTITIONS_IN_BATCH_MODE = "num_partitions_in_batch_mode";
public static final String FETCH_SPLITS_MAX_WAIT_TIME = "fetch_splits_max_wait_time_ms";
/**
* use insert stmt as the unified backend for all loads
*/
@ -1471,6 +1473,13 @@ public class SessionVariable implements Serializable, Writable {
needForward = true)
public int numPartitionsInBatchMode = 1024;
@VariableMgr.VarAttr(
name = FETCH_SPLITS_MAX_WAIT_TIME,
description = {"batch方式中BE获取splits的最大等待时间",
"The max wait time of getting splits in batch mode."},
needForward = true)
public long fetchSplitsMaxWaitTime = 4000;
@VariableMgr.VarAttr(
name = ENABLE_PARQUET_LAZY_MAT,
description = {"控制 parquet reader 是否启用延迟物化技术。默认为 true。",
@ -2731,6 +2740,14 @@ public class SessionVariable implements Serializable, Writable {
this.numPartitionsInBatchMode = numPartitionsInBatchMode;
}
public long getFetchSplitsMaxWaitTime() {
return fetchSplitsMaxWaitTime;
}
public void setFetchSplitsMaxWaitTime(long fetchSplitsMaxWaitTime) {
this.fetchSplitsMaxWaitTime = fetchSplitsMaxWaitTime;
}
public boolean isEnableParquetLazyMat() {
return enableParquetLazyMat;
}