[Fix](Nereids) fix column statistic derive in outer join estimation (#25586)

Problem:
When join estimation, upper join output slot statistic ndv would go wrong
Example:
we have two table:
tableA (a1[ndv = 10.0]) tableB(b1[ndv = 0.0], b2[ndv = 10.0])
tableA left join tableB on A.a1 = B.b1. which B.b1 with ndv zero.
the problem is after join estimation, B.b2 changed to 1.0.
Reason:
When estimating outer join, we can assume it behave like inner join. But we estimation then like inner join do
Solved:
When estimation outer join, output slot would update seperatly.
This commit is contained in:
LiBinfeng
2023-10-24 22:25:20 +08:00
committed by GitHub
parent 88dd480c2e
commit 440345169a
2 changed files with 62 additions and 3 deletions

View File

@ -91,4 +91,58 @@ public class JoinEstimateTest {
Assertions.assertNotNull(outAStats);
Assertions.assertEquals(5, outBStats.ndv);
}
@Test
public void testOuterJoinStats() {
SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
SlotReference c = new SlotReference("c", IntegerType.INSTANCE);
EqualTo eq = new EqualTo(a, b);
Statistics leftStats = new StatisticsBuilder().setRowCount(100).build();
leftStats.addColumnStats(a,
new ColumnStatisticBuilder()
.setCount(100)
.setNdv(10)
.build()
);
Statistics rightStats = new StatisticsBuilder().setRowCount(80).build();
rightStats.addColumnStats(b,
new ColumnStatisticBuilder()
.setCount(80)
.setNdv(0)
.build()
).addColumnStats(c,
new ColumnStatisticBuilder()
.setCount(80)
.setNdv(20)
.build()
);
IdGenerator<GroupId> idGenerator = GroupId.createGenerator();
GroupPlan left = new GroupPlan(new Group(idGenerator.getNextId(), new LogicalProperties(
new Supplier<List<Slot>>() {
@Override
public List<Slot> get() {
return Lists.newArrayList(a);
}
})));
GroupPlan right = new GroupPlan(new Group(idGenerator.getNextId(), new LogicalProperties(
new Supplier<List<Slot>>() {
@Override
public List<Slot> get() {
return Lists.newArrayList(b, c);
}
})));
LogicalJoin join = new LogicalJoin(JoinType.LEFT_OUTER_JOIN, Lists.newArrayList(eq),
left, right);
Statistics outputStats = JoinEstimation.estimate(leftStats, rightStats, join);
ColumnStatistic outAStats = outputStats.findColumnStatistics(a);
Assertions.assertNotNull(outAStats);
Assertions.assertEquals(10, outAStats.ndv);
ColumnStatistic outBStats = outputStats.findColumnStatistics(b);
Assertions.assertNotNull(outAStats);
Assertions.assertEquals(0, outBStats.ndv);
ColumnStatistic outCStats = outputStats.findColumnStatistics(c);
Assertions.assertNotNull(outAStats);
Assertions.assertEquals(20.0, outCStats.ndv);
}
}