diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index acf072fb82..3ea7caaf08 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -390,24 +390,7 @@ public class FilterEstimation extends ExpressionVisitor rightStats.ndv) { - sel1 = 1 / StatsMathUtil.nonZeroDivisor(leftStats.ndv); - } else { - sel1 = 1 / StatsMathUtil.nonZeroDivisor(rightStats.ndv); - } - double sel2 = Math.min(rightStats.ndv / rightStats.getOriginalNdv(), - leftStats.ndv / leftStats.getOriginalNdv()); - sel = sel1 * Math.pow(sel2, reduceRatio); - } else { - sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv)); - } + double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv)); Statistics updatedStatistics = context.statistics.withSel(sel); updatedStatistics.addColumnStats(leftExpr, leftBuilder.build()); updatedStatistics.addColumnStats(rightExpr, rightBuilder.build()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java index 228b6936a7..afef2285e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java @@ -72,16 +72,7 @@ public class JoinEstimation { return false; } - private static Statistics estimateInnerJoin(Statistics leftStats, Statistics rightStats, Join join) { - if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, join)) { - double rowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount()); - rowCount = Math.max(1, rowCount); - return new StatisticsBuilder() - .setRowCount(rowCount) - .putColumnStatistics(leftStats.columnStatistics()) - .putColumnStatistics(rightStats.columnStatistics()) - .build(); - } + private static Statistics estimateHashJoin(Statistics leftStats, Statistics rightStats, Join join) { /* * When we estimate filter A=B, * if any side of equation, A or B, is almost unique, the confidence level of estimation is high. @@ -95,31 +86,31 @@ public class JoinEstimation { List trustableConditions = join.getHashJoinConjuncts().stream() .map(expression -> (EqualTo) expression) .filter( - expression -> { - // since ndv is not accurate, if ndv/rowcount < almostUniqueThreshold, - // this column is regarded as unique. - double almostUniqueThreshold = 0.9; - EqualTo equal = normalizeHashJoinCondition(expression, leftStats, rightStats); - ColumnStatistic eqLeftColStats = ExpressionEstimation.estimate(equal.left(), leftStats); - ColumnStatistic eqRightColStats = ExpressionEstimation.estimate(equal.right(), rightStats); - double rightStatsRowCount = StatsMathUtil.nonZeroDivisor(rightStats.getRowCount()); - double leftStatsRowCount = StatsMathUtil.nonZeroDivisor(leftStats.getRowCount()); - boolean trustable = eqRightColStats.ndv / rightStatsRowCount > almostUniqueThreshold - || eqLeftColStats.ndv / leftStatsRowCount > almostUniqueThreshold; - if (!trustable) { - double rNdv = StatsMathUtil.nonZeroDivisor(eqRightColStats.ndv); - double lNdv = StatsMathUtil.nonZeroDivisor(eqLeftColStats.ndv); - if (leftBigger) { - unTrustEqualRatio.add((rightStatsRowCount / rNdv) - * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / lNdv); - } else { - unTrustEqualRatio.add((leftStatsRowCount / lNdv) - * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / rNdv); + expression -> { + // since ndv is not accurate, if ndv/rowcount < almostUniqueThreshold, + // this column is regarded as unique. + double almostUniqueThreshold = 0.9; + EqualTo equal = normalizeHashJoinCondition(expression, leftStats, rightStats); + ColumnStatistic eqLeftColStats = ExpressionEstimation.estimate(equal.left(), leftStats); + ColumnStatistic eqRightColStats = ExpressionEstimation.estimate(equal.right(), rightStats); + double rightStatsRowCount = StatsMathUtil.nonZeroDivisor(rightStats.getRowCount()); + double leftStatsRowCount = StatsMathUtil.nonZeroDivisor(leftStats.getRowCount()); + boolean trustable = eqRightColStats.ndv / rightStatsRowCount > almostUniqueThreshold + || eqLeftColStats.ndv / leftStatsRowCount > almostUniqueThreshold; + if (!trustable) { + double rNdv = StatsMathUtil.nonZeroDivisor(eqRightColStats.ndv); + double lNdv = StatsMathUtil.nonZeroDivisor(eqLeftColStats.ndv); + if (leftBigger) { + unTrustEqualRatio.add((rightStatsRowCount / rNdv) + * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / lNdv); + } else { + unTrustEqualRatio.add((leftStatsRowCount / lNdv) + * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / rNdv); + } + unTrustableCondition.add(equal); } - unTrustableCondition.add(equal); + return trustable; } - return trustable; - } ).collect(Collectors.toList()); Statistics innerJoinStats; @@ -159,6 +150,34 @@ public class JoinEstimation { } innerJoinStats = crossJoinStats.updateRowCountOnly(outputRowCount); } + return innerJoinStats; + } + + private static Statistics estimateNestLoopJoin(Statistics leftStats, Statistics rightStats, Join join) { + return new StatisticsBuilder() + .setRowCount(Math.max(1, leftStats.getRowCount() * rightStats.getRowCount())) + .putColumnStatistics(leftStats.columnStatistics()) + .putColumnStatistics(rightStats.columnStatistics()) + .build(); + } + + private static Statistics estimateInnerJoin(Statistics leftStats, Statistics rightStats, Join join) { + if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, join)) { + double rowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount()); + rowCount = Math.max(1, rowCount); + return new StatisticsBuilder() + .setRowCount(rowCount) + .putColumnStatistics(leftStats.columnStatistics()) + .putColumnStatistics(rightStats.columnStatistics()) + .build(); + } + + Statistics innerJoinStats; + if (join.getHashJoinConjuncts().isEmpty()) { + innerJoinStats = estimateNestLoopJoin(leftStats, rightStats, join); + } else { + innerJoinStats = estimateHashJoin(leftStats, rightStats, join); + } if (!join.getOtherJoinConjuncts().isEmpty()) { FilterEstimation filterEstimation = new FilterEstimation(); diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out index 42fdc0d063..dee0257716 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out @@ -44,24 +44,24 @@ CteAnchor[cteId= ( CTEId#4=] ) ----PhysicalDistribute ------PhysicalTopN --------PhysicalProject -----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)(CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END > CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END) +----------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)(CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END > CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END) ------------PhysicalDistribute --------------PhysicalProject -----------------filter((t_w_secyear.dyear = 2002)(t_w_secyear.sale_type = 'w')) +----------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2002)) ------------------CteConsumer[cteId= ( CTEId#4=] ) ------------PhysicalProject ---------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id) -----------------PhysicalDistribute -------------------PhysicalProject ---------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2002)) -----------------------CteConsumer[cteId= ( CTEId#4=] ) -----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id) +--------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id) +----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id) +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------filter((t_w_secyear.dyear = 2002)(t_w_secyear.sale_type = 'w')) +------------------------CteConsumer[cteId= ( CTEId#4=] ) ------------------PhysicalDistribute --------------------PhysicalProject ----------------------filter((t_s_firstyear.dyear = 2001)(t_s_firstyear.sale_type = 's')(t_s_firstyear.year_total > 0.00)) ------------------------CteConsumer[cteId= ( CTEId#4=] ) -------------------PhysicalDistribute ---------------------PhysicalProject -----------------------filter((t_w_firstyear.year_total > 0.00)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.dyear = 2001)) -------------------------CteConsumer[cteId= ( CTEId#4=] ) +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter((t_w_firstyear.year_total > 0.00)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.dyear = 2001)) +----------------------CteConsumer[cteId= ( CTEId#4=] ) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query18.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query18.out index e2953ab328..6ddc252783 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query18.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query18.out @@ -9,40 +9,41 @@ PhysicalTopN ------------hashAgg[LOCAL] --------------PhysicalRepeat ----------------PhysicalProject -------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk) ---------------------PhysicalProject -----------------------PhysicalOlapScan[item] +------------------hashJoin[INNER_JOIN](customer.c_current_cdemo_sk = cd2.cd_demo_sk) --------------------PhysicalDistribute ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN](customer.c_current_cdemo_sk = cd2.cd_demo_sk) +------------------------PhysicalOlapScan[customer_demographics] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk) +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[item] --------------------------PhysicalDistribute ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[customer_demographics] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk) ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk) -------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_customer_sk = customer.c_customer_sk) -----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[catalog_sales] -----------------------------------------PhysicalDistribute -------------------------------------------hashJoin[INNER_JOIN](customer.c_current_addr_sk = customer_address.ca_address_sk) ---------------------------------------------PhysicalDistribute -----------------------------------------------PhysicalProject -------------------------------------------------filter(c_birth_month IN (1, 2, 4, 7, 8, 10)) ---------------------------------------------------PhysicalOlapScan[customer] ---------------------------------------------PhysicalDistribute -----------------------------------------------PhysicalProject -------------------------------------------------filter(ca_state IN ('WA', 'GA', 'NC', 'ME', 'WY', 'OK', 'IN')) ---------------------------------------------------PhysicalOlapScan[customer_address] -------------------------------------PhysicalDistribute ---------------------------------------PhysicalProject -----------------------------------------filter((cast(cd_gender as VARCHAR(*)) = 'F')(cast(cd_education_status as VARCHAR(*)) = 'Advanced Degree')) -------------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------hashJoin[INNER_JOIN](customer.c_current_addr_sk = customer_address.ca_address_sk) --------------------------------PhysicalDistribute ----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_year = 1998)) ---------------------------------------PhysicalOlapScan[date_dim] +------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_customer_sk = customer.c_customer_sk) +--------------------------------------PhysicalDistribute +----------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk) +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk) +----------------------------------------------PhysicalProject +------------------------------------------------PhysicalOlapScan[catalog_sales] +----------------------------------------------PhysicalDistribute +------------------------------------------------PhysicalProject +--------------------------------------------------filter((cast(cd_gender as VARCHAR(*)) = 'F')(cast(cd_education_status as VARCHAR(*)) = 'Advanced Degree')) +----------------------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------------------PhysicalDistribute +--------------------------------------------PhysicalProject +----------------------------------------------filter((date_dim.d_year = 1998)) +------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------PhysicalDistribute +----------------------------------------PhysicalProject +------------------------------------------filter(c_birth_month IN (1, 2, 4, 7, 8, 10)) +--------------------------------------------PhysicalOlapScan[customer] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter(ca_state IN ('WA', 'GA', 'NC', 'ME', 'WY', 'OK', 'IN')) +--------------------------------------PhysicalOlapScan[customer_address] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out index c4b812b0b0..b1ba26b82e 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out @@ -65,35 +65,35 @@ CteAnchor[cteId= ( CTEId#6=] ) ------PhysicalTopN --------PhysicalProject ----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END) -------------PhysicalProject ---------------filter((t_w_secyear.sale_type = 'w')(t_w_secyear.dyear = 2000)) -----------------CteConsumer[cteId= ( CTEId#6=] ) ------------PhysicalDistribute --------------PhysicalProject -----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id) +----------------filter((t_w_secyear.sale_type = 'w')(t_w_secyear.dyear = 2000)) +------------------CteConsumer[cteId= ( CTEId#6=] ) +------------PhysicalProject +--------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id) +----------------PhysicalDistribute ------------------PhysicalProject --------------------filter((t_w_firstyear.dyear = 1999)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.year_total > 0.000000)) ----------------------CteConsumer[cteId= ( CTEId#6=] ) -------------------PhysicalDistribute +----------------PhysicalProject +------------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END) +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2000)) +--------------------------CteConsumer[cteId= ( CTEId#6=] ) --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END) +----------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_secyear.customer_id) ------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2000)) +----------------------------filter((t_c_secyear.sale_type = 'c')(t_c_secyear.dyear = 2000)) ------------------------------CteConsumer[cteId= ( CTEId#6=] ) -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_secyear.customer_id) -----------------------------PhysicalDistribute -------------------------------PhysicalProject ---------------------------------filter((t_c_secyear.sale_type = 'c')(t_c_secyear.dyear = 2000)) -----------------------------------CteConsumer[cteId= ( CTEId#6=] ) -----------------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_firstyear.customer_id) -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((t_s_firstyear.year_total > 0.000000)(t_s_firstyear.dyear = 1999)(t_s_firstyear.sale_type = 's')) -------------------------------------CteConsumer[cteId= ( CTEId#6=] ) -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((t_c_firstyear.year_total > 0.000000)(t_c_firstyear.dyear = 1999)(t_c_firstyear.sale_type = 'c')) -------------------------------------CteConsumer[cteId= ( CTEId#6=] ) +------------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_firstyear.customer_id) +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((t_s_firstyear.year_total > 0.000000)(t_s_firstyear.dyear = 1999)(t_s_firstyear.sale_type = 's')) +--------------------------------CteConsumer[cteId= ( CTEId#6=] ) +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((t_c_firstyear.year_total > 0.000000)(t_c_firstyear.dyear = 1999)(t_c_firstyear.sale_type = 'c')) +--------------------------------CteConsumer[cteId= ( CTEId#6=] ) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out index c1f925d45b..07a0ec06da 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out @@ -45,15 +45,7 @@ CteAnchor[cteId= ( CTEId#4=] ) ------PhysicalTopN --------PhysicalProject ----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)(CASE WHEN (year_total > 0.0) THEN (year_total / year_total) ELSE NULL END > CASE WHEN (year_total > 0.0) THEN (year_total / year_total) ELSE NULL END) -------------PhysicalDistribute ---------------PhysicalProject -----------------filter((t_w_firstyear.year = 1999)(t_w_firstyear.year_total > 0.0)(t_w_firstyear.sale_type = 'w')) -------------------CteConsumer[cteId= ( CTEId#4=] ) ------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id) ---------------PhysicalDistribute -----------------PhysicalProject -------------------filter((t_w_secyear.year = 2000)(t_w_secyear.sale_type = 'w')) ---------------------CteConsumer[cteId= ( CTEId#4=] ) --------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id) ----------------PhysicalDistribute ------------------PhysicalProject @@ -63,4 +55,12 @@ CteAnchor[cteId= ( CTEId#4=] ) ------------------PhysicalProject --------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.year = 2000)) ----------------------CteConsumer[cteId= ( CTEId#4=] ) +--------------PhysicalDistribute +----------------PhysicalProject +------------------filter((t_w_secyear.year = 2000)(t_w_secyear.sale_type = 'w')) +--------------------CteConsumer[cteId= ( CTEId#4=] ) +------------PhysicalDistribute +--------------PhysicalProject +----------------filter((t_w_firstyear.year = 1999)(t_w_firstyear.year_total > 0.0)(t_w_firstyear.sale_type = 'w')) +------------------CteConsumer[cteId= ( CTEId#4=] ) diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out index c1d1c3548a..404026aa64 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out @@ -4,22 +4,25 @@ PhysicalTopN --PhysicalDistribute ----PhysicalTopN ------PhysicalProject ---------hashAgg[LOCAL] -----------PhysicalProject -------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey) ---------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey) -----------------PhysicalProject -------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute +--------hashAgg[GLOBAL] +----------PhysicalDistribute +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) +--------------------filter((lineitem.l_returnflag = 'R')) +----------------------PhysicalOlapScan[lineitem] +------------------PhysicalDistribute +--------------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey) ----------------------PhysicalProject -------------------------filter((lineitem.l_returnflag = 'R')) ---------------------------PhysicalOlapScan[lineitem] -----------------------PhysicalProject -------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01)) ---------------------------PhysicalOlapScan[orders] ---------------PhysicalDistribute -----------------PhysicalProject -------------------PhysicalOlapScan[nation] +------------------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey) +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01)) +--------------------------------PhysicalOlapScan[orders] +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------PhysicalOlapScan[nation] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q5.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q5.out index f8211c8fa8..6d45aeda6b 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q5.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q5.out @@ -8,8 +8,6 @@ PhysicalQuickSort ----------hashAgg[LOCAL] ------------PhysicalProject --------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)(customer.c_nationkey = supplier.s_nationkey) -----------------PhysicalProject -------------------PhysicalOlapScan[customer] ----------------PhysicalDistribute ------------------PhysicalProject --------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) @@ -32,4 +30,7 @@ PhysicalQuickSort ----------------------PhysicalProject ------------------------filter((orders.o_orderdate < 1995-01-01)(orders.o_orderdate >= 1994-01-01)) --------------------------PhysicalOlapScan[orders] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------PhysicalOlapScan[customer] diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out index c1d1c3548a..404026aa64 100644 --- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out +++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out @@ -4,22 +4,25 @@ PhysicalTopN --PhysicalDistribute ----PhysicalTopN ------PhysicalProject ---------hashAgg[LOCAL] -----------PhysicalProject -------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey) ---------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey) -----------------PhysicalProject -------------------PhysicalOlapScan[customer] -----------------PhysicalDistribute +--------hashAgg[GLOBAL] +----------PhysicalDistribute +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) +--------------------filter((lineitem.l_returnflag = 'R')) +----------------------PhysicalOlapScan[lineitem] +------------------PhysicalDistribute +--------------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey) ----------------------PhysicalProject -------------------------filter((lineitem.l_returnflag = 'R')) ---------------------------PhysicalOlapScan[lineitem] -----------------------PhysicalProject -------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01)) ---------------------------PhysicalOlapScan[orders] ---------------PhysicalDistribute -----------------PhysicalProject -------------------PhysicalOlapScan[nation] +------------------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey) +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01)) +--------------------------------PhysicalOlapScan[orders] +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------PhysicalOlapScan[nation] diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out index f8211c8fa8..6d45aeda6b 100644 --- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out +++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out @@ -8,8 +8,6 @@ PhysicalQuickSort ----------hashAgg[LOCAL] ------------PhysicalProject --------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)(customer.c_nationkey = supplier.s_nationkey) -----------------PhysicalProject -------------------PhysicalOlapScan[customer] ----------------PhysicalDistribute ------------------PhysicalProject --------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey) @@ -32,4 +30,7 @@ PhysicalQuickSort ----------------------PhysicalProject ------------------------filter((orders.o_orderdate < 1995-01-01)(orders.o_orderdate >= 1994-01-01)) --------------------------PhysicalOlapScan[orders] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------PhysicalOlapScan[customer]