[opt](scan) read scan ranges in the order of partitions (#33515) (#33657)

backport: #33515
This commit is contained in:
Ashin Gau
2024-04-15 16:20:12 +08:00
committed by yiguolei
parent d0394b7f89
commit 2cd4012541
4 changed files with 79 additions and 43 deletions

View File

@ -211,24 +211,18 @@ public class FederationBackendPolicy {
this.enableSplitsRedistribution = enableSplitsRedistribution;
}
/**
* Assign splits to each backend. Ensure that each backend receives a similar amount of data.
* In order to make sure backends utilize the os page cache as much as possible, and all backends read splits
* in the order of partitions(reading data in partition order can reduce the memory usage of backends),
* splits should be sorted by path.
* Fortunately, the process of obtaining splits ensures that the splits have been sorted according to the path.
* If the splits are unordered, it is strongly recommended to sort them before calling this function.
*/
public Multimap<Backend, Split> computeScanRangeAssignment(List<Split> splits) throws UserException {
// Sorting splits is to ensure that the same query utilizes the os page cache as much as possible.
splits.sort((split1, split2) -> {
int pathComparison = split1.getPathString().compareTo(split2.getPathString());
if (pathComparison != 0) {
return pathComparison;
}
int startComparison = Long.compare(split1.getStart(), split2.getStart());
if (startComparison != 0) {
return startComparison;
}
return Long.compare(split1.getLength(), split2.getLength());
});
ListMultimap<Backend, Split> assignment = ArrayListMultimap.create();
List<Split> remainingSplits = null;
List<Split> remainingSplits;
List<Backend> backends = new ArrayList<>();
for (List<Backend> backendList : backendMap.values()) {
@ -242,8 +236,7 @@ public class FederationBackendPolicy {
// locality information
if (Config.split_assigner_optimized_local_scheduling) {
remainingSplits = new ArrayList<>(splits.size());
for (int i = 0; i < splits.size(); ++i) {
Split split = splits.get(i);
for (Split split : splits) {
if (split.isRemotelyAccessible() && (split.getHosts() != null && split.getHosts().length > 0)) {
List<Backend> candidateNodes = selectExactNodes(backendMap, split.getHosts());

View File

@ -288,6 +288,21 @@ public class FederationBackendPolicyTest {
}
public static void sortSplits(List<Split> splits) {
splits.sort((split1, split2) -> {
int pathComparison = split1.getPathString().compareTo(split2.getPathString());
if (pathComparison != 0) {
return pathComparison;
}
int startComparison = Long.compare(split1.getStart(), split2.getStart());
if (startComparison != 0) {
return startComparison;
}
return Long.compare(split1.getLength(), split2.getLength());
});
}
@Test
public void testGenerateRandomly() throws UserException {
SystemInfoService service = new SystemInfoService();
@ -367,7 +382,7 @@ public class FederationBackendPolicyTest {
List<Split> totalSplits = new ArrayList<>();
totalSplits.addAll(remoteSplits);
totalSplits.addAll(localSplits);
Collections.shuffle(totalSplits);
sortSplits(totalSplits);
Multimap<Backend, Split> assignment = policy.computeScanRangeAssignment(totalSplits);
if (i == 0) {
result = ArrayListMultimap.create(assignment);
@ -489,7 +504,7 @@ public class FederationBackendPolicyTest {
List<Split> totalSplits = new ArrayList<>();
totalSplits.addAll(remoteSplits);
totalSplits.addAll(localSplits);
Collections.shuffle(totalSplits);
sortSplits(totalSplits);
Multimap<Backend, Split> assignment = policy.computeScanRangeAssignment(totalSplits);
if (i == 0) {
result = ArrayListMultimap.create(assignment);