[opt](nereids) optimize filter estimation for pattern "col=col" #18716
Tpc-h q10 and q5 benefit from this optimization. For a given hash join condition, A=B, sometimes both A and B are reduced by filters. In this pr, both reductions are counted in join estimation.
This commit is contained in:
@ -389,7 +389,22 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
|
||||
rightBuilder.setNdv(rightIntersectLeft.getDistinctValues());
|
||||
rightBuilder.setMinValue(rightIntersectLeft.getLow());
|
||||
rightBuilder.setMaxValue(rightIntersectLeft.getDistinctValues());
|
||||
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
|
||||
double sel;
|
||||
double reduceRatio = 0.25;
|
||||
double bothSideReducedRatio = 0.9;
|
||||
if (leftStats.ndv < leftStats.originalNdv * bothSideReducedRatio
|
||||
&& rightStats.ndv < rightStats.originalNdv * bothSideReducedRatio) {
|
||||
double sel1;
|
||||
if (leftStats.ndv > rightStats.ndv) {
|
||||
sel1 = 1 / StatsMathUtil.nonZeroDivisor(leftStats.ndv);
|
||||
} else {
|
||||
sel1 = 1 / StatsMathUtil.nonZeroDivisor(rightStats.ndv);
|
||||
}
|
||||
double sel2 = Math.min(rightStats.ndv / rightStats.originalNdv, leftStats.ndv / leftStats.originalNdv);
|
||||
sel = sel1 * Math.pow(sel2, reduceRatio);
|
||||
} else {
|
||||
sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
|
||||
}
|
||||
Statistics updatedStatistics = context.statistics.withSel(sel);
|
||||
updatedStatistics.addColumnStats(leftExpr, leftBuilder.build());
|
||||
updatedStatistics.addColumnStats(rightExpr, rightBuilder.build());
|
||||
|
||||
@ -5,22 +5,20 @@ PhysicalTopN
|
||||
----PhysicalTopN
|
||||
------PhysicalProject
|
||||
--------hashAgg[LOCAL]
|
||||
----------PhysicalDistribute
|
||||
------------PhysicalProject
|
||||
--------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
|
||||
----------PhysicalProject
|
||||
------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey)
|
||||
--------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)
|
||||
----------------PhysicalProject
|
||||
------------------filter((lineitem.l_returnflag = 'R'))
|
||||
--------------------PhysicalOlapScan[lineitem]
|
||||
------------------PhysicalOlapScan[customer]
|
||||
----------------PhysicalDistribute
|
||||
------------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey)
|
||||
--------------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)
|
||||
----------------------PhysicalProject
|
||||
------------------------PhysicalOlapScan[customer]
|
||||
----------------------PhysicalDistribute
|
||||
------------------------PhysicalProject
|
||||
--------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01))
|
||||
----------------------------PhysicalOlapScan[orders]
|
||||
--------------------PhysicalDistribute
|
||||
----------------------PhysicalProject
|
||||
------------------------PhysicalOlapScan[nation]
|
||||
------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
|
||||
--------------------PhysicalProject
|
||||
----------------------filter((lineitem.l_returnflag = 'R'))
|
||||
------------------------PhysicalOlapScan[lineitem]
|
||||
--------------------PhysicalProject
|
||||
----------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01))
|
||||
------------------------PhysicalOlapScan[orders]
|
||||
--------------PhysicalDistribute
|
||||
----------------PhysicalProject
|
||||
------------------PhysicalOlapScan[nation]
|
||||
|
||||
|
||||
@ -9,27 +9,27 @@ PhysicalQuickSort
|
||||
------------PhysicalProject
|
||||
--------------hashJoin[INNER_JOIN](customer.c_nationkey = supplier.s_nationkey)(customer.c_custkey = orders.o_custkey)
|
||||
----------------PhysicalProject
|
||||
------------------hashJoin[INNER_JOIN](lineitem.l_suppkey = supplier.s_suppkey)
|
||||
--------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
|
||||
----------------------PhysicalProject
|
||||
------------------------PhysicalOlapScan[lineitem]
|
||||
----------------------PhysicalProject
|
||||
------------------------filter((orders.o_orderdate < 1995-01-01)(orders.o_orderdate >= 1994-01-01))
|
||||
--------------------------PhysicalOlapScan[orders]
|
||||
--------------------PhysicalDistribute
|
||||
----------------------PhysicalProject
|
||||
------------------------hashJoin[INNER_JOIN](supplier.s_nationkey = nation.n_nationkey)
|
||||
--------------------------PhysicalProject
|
||||
----------------------------PhysicalOlapScan[supplier]
|
||||
--------------------------PhysicalDistribute
|
||||
----------------------------hashJoin[INNER_JOIN](nation.n_regionkey = region.r_regionkey)
|
||||
------------------------------PhysicalProject
|
||||
--------------------------------PhysicalOlapScan[nation]
|
||||
------------------------------PhysicalDistribute
|
||||
--------------------------------PhysicalProject
|
||||
----------------------------------filter((region.r_name = 'ASIA'))
|
||||
------------------------------------PhysicalOlapScan[region]
|
||||
------------------PhysicalOlapScan[customer]
|
||||
----------------PhysicalDistribute
|
||||
------------------PhysicalProject
|
||||
--------------------PhysicalOlapScan[customer]
|
||||
--------------------hashJoin[INNER_JOIN](lineitem.l_suppkey = supplier.s_suppkey)
|
||||
----------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
|
||||
------------------------PhysicalProject
|
||||
--------------------------PhysicalOlapScan[lineitem]
|
||||
------------------------PhysicalProject
|
||||
--------------------------filter((orders.o_orderdate < 1995-01-01)(orders.o_orderdate >= 1994-01-01))
|
||||
----------------------------PhysicalOlapScan[orders]
|
||||
----------------------PhysicalDistribute
|
||||
------------------------PhysicalProject
|
||||
--------------------------hashJoin[INNER_JOIN](supplier.s_nationkey = nation.n_nationkey)
|
||||
----------------------------PhysicalProject
|
||||
------------------------------PhysicalOlapScan[supplier]
|
||||
----------------------------PhysicalDistribute
|
||||
------------------------------hashJoin[INNER_JOIN](nation.n_regionkey = region.r_regionkey)
|
||||
--------------------------------PhysicalProject
|
||||
----------------------------------PhysicalOlapScan[nation]
|
||||
--------------------------------PhysicalDistribute
|
||||
----------------------------------PhysicalProject
|
||||
------------------------------------filter((region.r_name = 'ASIA'))
|
||||
--------------------------------------PhysicalOlapScan[region]
|
||||
|
||||
|
||||
@ -5,22 +5,20 @@ PhysicalTopN
|
||||
----PhysicalTopN
|
||||
------PhysicalProject
|
||||
--------hashAgg[LOCAL]
|
||||
----------PhysicalDistribute
|
||||
------------PhysicalProject
|
||||
--------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
|
||||
----------PhysicalProject
|
||||
------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey)
|
||||
--------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)
|
||||
----------------PhysicalProject
|
||||
------------------filter((lineitem.l_returnflag = 'R'))
|
||||
--------------------PhysicalOlapScan[lineitem]
|
||||
------------------PhysicalOlapScan[customer]
|
||||
----------------PhysicalDistribute
|
||||
------------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey)
|
||||
--------------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)
|
||||
----------------------PhysicalProject
|
||||
------------------------PhysicalOlapScan[customer]
|
||||
----------------------PhysicalDistribute
|
||||
------------------------PhysicalProject
|
||||
--------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01))
|
||||
----------------------------PhysicalOlapScan[orders]
|
||||
--------------------PhysicalDistribute
|
||||
----------------------PhysicalProject
|
||||
------------------------PhysicalOlapScan[nation]
|
||||
------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
|
||||
--------------------PhysicalProject
|
||||
----------------------filter((lineitem.l_returnflag = 'R'))
|
||||
------------------------PhysicalOlapScan[lineitem]
|
||||
--------------------PhysicalProject
|
||||
----------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01))
|
||||
------------------------PhysicalOlapScan[orders]
|
||||
--------------PhysicalDistribute
|
||||
----------------PhysicalProject
|
||||
------------------PhysicalOlapScan[nation]
|
||||
|
||||
|
||||
@ -8,6 +8,8 @@ PhysicalQuickSort
|
||||
----------hashAgg[LOCAL]
|
||||
------------PhysicalProject
|
||||
--------------hashJoin[INNER_JOIN](customer.c_nationkey = supplier.s_nationkey)(customer.c_custkey = orders.o_custkey)
|
||||
----------------PhysicalProject
|
||||
------------------PhysicalOlapScan[customer]
|
||||
----------------PhysicalDistribute
|
||||
------------------PhysicalProject
|
||||
--------------------hashJoin[INNER_JOIN](lineitem.l_suppkey = supplier.s_suppkey)
|
||||
@ -30,6 +32,4 @@ PhysicalQuickSort
|
||||
----------------------------------PhysicalProject
|
||||
------------------------------------filter((region.r_name = 'ASIA'))
|
||||
--------------------------------------PhysicalOlapScan[region]
|
||||
----------------PhysicalProject
|
||||
------------------PhysicalOlapScan[customer]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user