[opt](nereids)adjust distribution cost for better choice of broadcast join and shuffle join (#27113)

add boundary to distribution cost factor
This commit is contained in:
minghong
2023-11-28 10:41:16 +08:00
committed by GitHub
parent d1e163126c
commit 9903c30591
240 changed files with 4580 additions and 4836 deletions

View File

@ -218,7 +218,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
}
// any
// cost of randome shuffle is lower than hash shuffle.
// cost of random shuffle is lower than hash shuffle.
return CostV1.of(context.getSessionVariable(),
0,
0,
@ -290,8 +290,13 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
int parallelInstance = Math.max(1, context.getSessionVariable().getParallelExecInstanceNum());
int totalInstanceNumber = parallelInstance * beNumber;
if (buildSideFactor <= 1.0) {
// use totalInstanceNumber to the power of 2 as the default factor value
buildSideFactor = Math.pow(totalInstanceNumber, 0.5);
if (buildStats.computeSize() < 1024 * 1024) {
// no penalty to broadcast if build side is small
buildSideFactor = 1.0;
} else {
// use totalInstanceNumber to the power of 2 as the default factor value
buildSideFactor = Math.pow(totalInstanceNumber, 0.5);
}
}
return CostV1.of(context.getSessionVariable(),
leftRowCount + rightRowCount * buildSideFactor + outputRowCount * probeSideFactor,

View File

@ -129,7 +129,9 @@ public class Statistics {
}
public double dataSizeFactor() {
return computeTupleSize() / K_BYTES;
double lowerBound = 0.03;
double upperBound = 0.07;
return Math.min(Math.max(computeTupleSize() / K_BYTES, lowerBound), upperBound);
}
@Override