[Fix](Nereids) fix column statistic derive in outer join estimation (#25586)

Problem:
When join estimation, upper join output slot statistic ndv would go wrong
Example:
we have two table:
tableA (a1[ndv = 10.0]) tableB(b1[ndv = 0.0], b2[ndv = 10.0])
tableA left join tableB on A.a1 = B.b1. which B.b1 with ndv zero.
the problem is after join estimation, B.b2 changed to 1.0.
Reason:
When estimating outer join, we can assume it behave like inner join. But we estimation then like inner join do
Solved:
When estimation outer join, output slot would update seperatly.
This commit is contained in:
LiBinfeng
2023-10-24 22:25:20 +08:00
committed by GitHub
parent 88dd480c2e
commit 440345169a
2 changed files with 62 additions and 3 deletions

View File

@ -294,6 +294,11 @@ public class JoinEstimation {
*/
public static Statistics estimate(Statistics leftStats, Statistics rightStats, Join join) {
JoinType joinType = join.getJoinType();
Statistics crossJoinStats = new StatisticsBuilder()
.setRowCount(Math.max(1, leftStats.getRowCount()) * Math.max(1, rightStats.getRowCount()))
.putColumnStatistics(leftStats.columnStatistics())
.putColumnStatistics(rightStats.columnStatistics())
.build();
if (joinType.isSemiOrAntiJoin()) {
return estimateSemiOrAnti(leftStats, rightStats, join);
} else if (joinType == JoinType.INNER_JOIN) {
@ -304,15 +309,15 @@ public class JoinEstimation {
Statistics innerJoinStats = estimateInnerJoin(leftStats, rightStats, join);
double rowCount = Math.max(leftStats.getRowCount(), innerJoinStats.getRowCount());
rowCount = Math.max(leftStats.getRowCount(), rowCount);
return innerJoinStats.withRowCountAndEnforceValid(rowCount);
return crossJoinStats.withRowCountAndEnforceValid(rowCount);
} else if (joinType == JoinType.RIGHT_OUTER_JOIN) {
Statistics innerJoinStats = estimateInnerJoin(leftStats, rightStats, join);
double rowCount = Math.max(rightStats.getRowCount(), innerJoinStats.getRowCount());
rowCount = Math.max(rowCount, rightStats.getRowCount());
return innerJoinStats.withRowCountAndEnforceValid(rowCount);
return crossJoinStats.withRowCountAndEnforceValid(rowCount);
} else if (joinType == JoinType.FULL_OUTER_JOIN) {
Statistics innerJoinStats = estimateInnerJoin(leftStats, rightStats, join);
return innerJoinStats.withRowCountAndEnforceValid(leftStats.getRowCount()
return crossJoinStats.withRowCountAndEnforceValid(leftStats.getRowCount()
+ rightStats.getRowCount() + innerJoinStats.getRowCount());
} else if (joinType == JoinType.CROSS_JOIN) {
return new StatisticsBuilder()