diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 54fbbb08a9..6deca33e2e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -389,7 +389,22 @@ public class FilterEstimation extends ExpressionVisitor rightStats.ndv) { + sel1 = 1 / StatsMathUtil.nonZeroDivisor(leftStats.ndv); + } else { + sel1 = 1 / StatsMathUtil.nonZeroDivisor(rightStats.ndv); + } + double sel2 = Math.min(rightStats.ndv / rightStats.originalNdv, leftStats.ndv / leftStats.originalNdv); + sel = sel1 * Math.pow(sel2, reduceRatio); + } else { + sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv)); + } Statistics updatedStatistics = context.statistics.withSel(sel); updatedStatistics.addColumnStats(leftExpr, leftBuilder.build()); updatedStatistics.addColumnStats(rightExpr, rightBuilder.build()); diff --git a/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q10.out b/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q10.out index b70a354e27..316a267223 100644 --- a/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q10.out +++ b/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q10.out @@ -5,22 +5,20 @@ PhysicalTopN ----PhysicalTopN ------PhysicalProject --------hashAgg[LOCAL] -----------PhysicalDistribute -------------PhysicalProject ---------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) +----------PhysicalProject +------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey) +--------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey) ----------------PhysicalProject -------------------filter((lineitem.l_returnflag = 'R')) ---------------------PhysicalOlapScan[lineitem] +------------------PhysicalOlapScan[customer] ----------------PhysicalDistribute -------------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey) ---------------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey) -----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------------PhysicalDistribute -------------------------PhysicalProject ---------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01)) -----------------------------PhysicalOlapScan[orders] ---------------------PhysicalDistribute -----------------------PhysicalProject -------------------------PhysicalOlapScan[nation] +------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) +--------------------PhysicalProject +----------------------filter((lineitem.l_returnflag = 'R')) +------------------------PhysicalOlapScan[lineitem] +--------------------PhysicalProject +----------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01)) +------------------------PhysicalOlapScan[orders] +--------------PhysicalDistribute +----------------PhysicalProject +------------------PhysicalOlapScan[nation] diff --git a/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q5.out b/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q5.out index e481a2b82e..7554f864fe 100644 --- a/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q5.out +++ b/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q5.out @@ -9,27 +9,27 @@ PhysicalQuickSort ------------PhysicalProject --------------hashJoin[INNER_JOIN](customer.c_nationkey = supplier.s_nationkey)(customer.c_custkey = orders.o_custkey) ----------------PhysicalProject -------------------hashJoin[INNER_JOIN](lineitem.l_suppkey = supplier.s_suppkey) ---------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) -----------------------PhysicalProject -------------------------PhysicalOlapScan[lineitem] -----------------------PhysicalProject -------------------------filter((orders.o_orderdate < 1995-01-01)(orders.o_orderdate >= 1994-01-01)) ---------------------------PhysicalOlapScan[orders] ---------------------PhysicalDistribute -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN](supplier.s_nationkey = nation.n_nationkey) ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[supplier] ---------------------------PhysicalDistribute -----------------------------hashJoin[INNER_JOIN](nation.n_regionkey = region.r_regionkey) -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[nation] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((region.r_name = 'ASIA')) -------------------------------------PhysicalOlapScan[region] +------------------PhysicalOlapScan[customer] ----------------PhysicalDistribute ------------------PhysicalProject ---------------------PhysicalOlapScan[customer] +--------------------hashJoin[INNER_JOIN](lineitem.l_suppkey = supplier.s_suppkey) +----------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) +------------------------PhysicalProject +--------------------------PhysicalOlapScan[lineitem] +------------------------PhysicalProject +--------------------------filter((orders.o_orderdate < 1995-01-01)(orders.o_orderdate >= 1994-01-01)) +----------------------------PhysicalOlapScan[orders] +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN](supplier.s_nationkey = nation.n_nationkey) +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[supplier] +----------------------------PhysicalDistribute +------------------------------hashJoin[INNER_JOIN](nation.n_regionkey = region.r_regionkey) +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[nation] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((region.r_name = 'ASIA')) +--------------------------------------PhysicalOlapScan[region] diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out index b70a354e27..316a267223 100644 --- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out +++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out @@ -5,22 +5,20 @@ PhysicalTopN ----PhysicalTopN ------PhysicalProject --------hashAgg[LOCAL] -----------PhysicalDistribute -------------PhysicalProject ---------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) +----------PhysicalProject +------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey) +--------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey) ----------------PhysicalProject -------------------filter((lineitem.l_returnflag = 'R')) ---------------------PhysicalOlapScan[lineitem] +------------------PhysicalOlapScan[customer] ----------------PhysicalDistribute -------------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey) ---------------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey) -----------------------PhysicalProject -------------------------PhysicalOlapScan[customer] -----------------------PhysicalDistribute -------------------------PhysicalProject ---------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01)) -----------------------------PhysicalOlapScan[orders] ---------------------PhysicalDistribute -----------------------PhysicalProject -------------------------PhysicalOlapScan[nation] +------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) +--------------------PhysicalProject +----------------------filter((lineitem.l_returnflag = 'R')) +------------------------PhysicalOlapScan[lineitem] +--------------------PhysicalProject +----------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01)) +------------------------PhysicalOlapScan[orders] +--------------PhysicalDistribute +----------------PhysicalProject +------------------PhysicalOlapScan[nation] diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out index e7bd14a143..7554f864fe 100644 --- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out +++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out @@ -8,6 +8,8 @@ PhysicalQuickSort ----------hashAgg[LOCAL] ------------PhysicalProject --------------hashJoin[INNER_JOIN](customer.c_nationkey = supplier.s_nationkey)(customer.c_custkey = orders.o_custkey) +----------------PhysicalProject +------------------PhysicalOlapScan[customer] ----------------PhysicalDistribute ------------------PhysicalProject --------------------hashJoin[INNER_JOIN](lineitem.l_suppkey = supplier.s_suppkey) @@ -30,6 +32,4 @@ PhysicalQuickSort ----------------------------------PhysicalProject ------------------------------------filter((region.r_name = 'ASIA')) --------------------------------------PhysicalOlapScan[region] -----------------PhysicalProject -------------------PhysicalOlapScan[customer]