From a6d201380225a31ae95621502bfa5256b2bffbf5 Mon Sep 17 00:00:00 2001 From: minghong Date: Wed, 8 Nov 2023 17:15:53 +0800 Subject: [PATCH] [opt](nereids) use 2 phase agg above union all (#26245) forbid one phase agg for pattern: agg-unionAll one phase agg plan: agg-union-hashDistribute-children two phase agg plan: agg(global) - hashDistribute-agg(local)-union-randomDistribute the key point is the cost of randomDistribute is much lower than the hashDistribute, and hence two-phase agg wins. --- .../doris/nereids/cost/CostModelV1.java | 6 +- .../ChildrenPropertiesRegulator.java | 10 ++ .../plans/physical/PhysicalSetOperation.java | 4 + .../doris/statistics/ColumnStatistic.java | 4 +- .../shape/query1.out | 13 +- .../shape/query49.out | 132 +++++++++--------- .../shape/query75.out | 90 ++++++------ .../shape/query1.out | 13 +- .../shape/query49.out | 132 +++++++++--------- .../shape/query75.out | 90 ++++++------ .../aggregate/agg_union_random.groovy | 54 +++++++ 11 files changed, 314 insertions(+), 234 deletions(-) create mode 100644 regression-test/suites/nereids_p0/aggregate/agg_union_random.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java index 33b3c80171..72baa7deaf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java @@ -56,6 +56,7 @@ class CostModelV1 extends PlanVisitor { // the penalty factor is no more than BROADCAST_JOIN_SKEW_PENALTY_LIMIT static final double BROADCAST_JOIN_SKEW_RATIO = 30.0; static final double BROADCAST_JOIN_SKEW_PENALTY_LIMIT = 2.0; + static final double RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR = 0.1; private final int beNumber; public CostModelV1(ConnectContext connectContext) { @@ -217,10 +218,11 @@ class CostModelV1 extends PlanVisitor { } // any + // cost of randome shuffle is lower than hash shuffle. return CostV1.of(context.getSessionVariable(), - intputRowCount, 0, - 0); + 0, + intputRowCount * childStatistics.dataSizeFactor() * RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR / beNumber); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java index f0a2331105..3dfa2615af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java @@ -42,6 +42,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin; import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation; +import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; import org.apache.doris.nereids.util.JoinUtils; import org.apache.doris.qe.ConnectContext; @@ -117,6 +118,15 @@ public class ChildrenPropertiesRegulator extends PlanVisitor { && children.get(0).getPlan() instanceof PhysicalDistribute) { return false; } + + // agg(group by x)-union all(A, B) + // no matter x.ndv is high or not, it is not worthwhile to shuffle A and B by x + // and hence we forbid one phase agg + if (agg.getAggMode() == AggMode.INPUT_TO_RESULT + && children.get(0).getPlan() instanceof PhysicalUnion + && !((PhysicalUnion) children.get(0).getPlan()).isDistinct()) { + return false; + } // forbid multi distinct opt that bad than multi-stage version when multi-stage can be executed in one fragment if (agg.getAggMode() == AggMode.INPUT_TO_BUFFER || agg.getAggMode() == AggMode.INPUT_TO_RESULT) { List multiDistinctions = agg.getOutputExpressions().stream() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java index 83f15e3f66..ec9537e7cb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalSetOperation.java @@ -194,4 +194,8 @@ public abstract class PhysicalSetOperation extends AbstractPhysicalPlan implemen .map(NamedExpression::toSlot) .collect(ImmutableList.toImmutableList()); } + + public boolean isDistinct() { + return qualifier == Qualifier.DISTINCT; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 82e0efdac1..1ec22cbc47 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -40,7 +40,7 @@ import java.util.Set; public class ColumnStatistic { public static final double STATS_ERROR = 0.1D; - + public static final double ALMOST_UNIQUE_FACTOR = 0.9; public static final StatsType NDV = StatsType.NDV; public static final StatsType AVG_SIZE = StatsType.AVG_SIZE; public static final StatsType MAX_SIZE = StatsType.MAX_SIZE; @@ -211,7 +211,7 @@ public class ColumnStatistic { } public static boolean isAlmostUnique(double ndv, double rowCount) { - return rowCount * 0.9 < ndv && ndv < rowCount * 1.1; + return rowCount * ALMOST_UNIQUE_FACTOR < ndv; } public ColumnStatistic updateByLimit(long limit, double rowCount) { diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out index 48b9402777..50d0c4bce6 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out @@ -33,10 +33,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------------PhysicalProject --------------------------filter((store.s_state = 'TN')) ----------------------------PhysicalOlapScan[store] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute -----------------------hashAgg[LOCAL] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------PhysicalDistribute +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute +------------------------hashAgg[LOCAL] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query49.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query49.out index 28dfccd68e..a8ab13bb0f 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query49.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query49.out @@ -4,84 +4,86 @@ PhysicalResultSink --PhysicalTopN ----PhysicalDistribute ------PhysicalTopN ---------hashAgg[LOCAL] -----------PhysicalUnion -------------PhysicalDistribute ---------------PhysicalProject -----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) -------------------PhysicalWindow ---------------------PhysicalQuickSort +--------hashAgg[GLOBAL] +----------PhysicalDistribute +------------hashAgg[LOCAL] +--------------PhysicalUnion +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ----------------------PhysicalWindow ------------------------PhysicalQuickSort ---------------------------PhysicalDistribute +--------------------------PhysicalWindow ----------------------------PhysicalQuickSort -------------------------------PhysicalProject ---------------------------------hashAgg[GLOBAL] -----------------------------------PhysicalDistribute -------------------------------------hashAgg[LOCAL] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_item_sk = wr.wr_item_sk) and (ws.ws_order_number = wr.wr_order_number))otherCondition=() +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] ------------------------------------------PhysicalProject ---------------------------------------------filter((wr.wr_return_amt > 10000.00)) -----------------------------------------------PhysicalOlapScan[web_returns] -------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_sold_date_sk = date_dim.d_date_sk))otherCondition=() ---------------------------------------------PhysicalProject -----------------------------------------------filter((ws.ws_net_paid > 0.00) and (ws.ws_net_profit > 1.00) and (ws.ws_quantity > 0)) -------------------------------------------------PhysicalOlapScan[web_sales] ---------------------------------------------PhysicalDistribute +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_item_sk = wr.wr_item_sk) and (ws.ws_order_number = wr.wr_order_number))otherCondition=() ----------------------------------------------PhysicalProject -------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) ---------------------------------------------------PhysicalOlapScan[date_dim] -------------PhysicalDistribute ---------------PhysicalProject -----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) -------------------PhysicalWindow ---------------------PhysicalQuickSort +------------------------------------------------filter((wr.wr_return_amt > 10000.00)) +--------------------------------------------------PhysicalOlapScan[web_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((ws.ws_net_paid > 0.00) and (ws.ws_net_profit > 1.00) and (ws.ws_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[web_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ----------------------PhysicalWindow ------------------------PhysicalQuickSort ---------------------------PhysicalDistribute +--------------------------PhysicalWindow ----------------------------PhysicalQuickSort -------------------------------PhysicalProject ---------------------------------hashAgg[GLOBAL] -----------------------------------PhysicalDistribute -------------------------------------hashAgg[LOCAL] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_item_sk = cr.cr_item_sk) and (cs.cs_order_number = cr.cr_order_number))otherCondition=() +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] ------------------------------------------PhysicalProject ---------------------------------------------filter((cr.cr_return_amount > 10000.00)) -----------------------------------------------PhysicalOlapScan[catalog_returns] -------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_sold_date_sk = date_dim.d_date_sk))otherCondition=() ---------------------------------------------PhysicalProject -----------------------------------------------filter((cs.cs_net_paid > 0.00) and (cs.cs_net_profit > 1.00) and (cs.cs_quantity > 0)) -------------------------------------------------PhysicalOlapScan[catalog_sales] ---------------------------------------------PhysicalDistribute +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_item_sk = cr.cr_item_sk) and (cs.cs_order_number = cr.cr_order_number))otherCondition=() ----------------------------------------------PhysicalProject -------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) ---------------------------------------------------PhysicalOlapScan[date_dim] -------------PhysicalDistribute ---------------PhysicalProject -----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) -------------------PhysicalWindow ---------------------PhysicalQuickSort +------------------------------------------------filter((cr.cr_return_amount > 10000.00)) +--------------------------------------------------PhysicalOlapScan[catalog_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((cs.cs_net_paid > 0.00) and (cs.cs_net_profit > 1.00) and (cs.cs_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[catalog_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ----------------------PhysicalWindow ------------------------PhysicalQuickSort ---------------------------PhysicalDistribute +--------------------------PhysicalWindow ----------------------------PhysicalQuickSort -------------------------------PhysicalProject ---------------------------------hashAgg[GLOBAL] -----------------------------------PhysicalDistribute -------------------------------------hashAgg[LOCAL] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_item_sk = sr.sr_item_sk) and (sts.ss_ticket_number = sr.sr_ticket_number))otherCondition=() +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] ------------------------------------------PhysicalProject ---------------------------------------------filter((sr.sr_return_amt > 10000.00)) -----------------------------------------------PhysicalOlapScan[store_returns] -------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_sold_date_sk = date_dim.d_date_sk))otherCondition=() ---------------------------------------------PhysicalProject -----------------------------------------------filter((sts.ss_net_paid > 0.00) and (sts.ss_net_profit > 1.00) and (sts.ss_quantity > 0)) -------------------------------------------------PhysicalOlapScan[store_sales] ---------------------------------------------PhysicalDistribute +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_item_sk = sr.sr_item_sk) and (sts.ss_ticket_number = sr.sr_ticket_number))otherCondition=() ----------------------------------------------PhysicalProject -------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) ---------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------------filter((sr.sr_return_amt > 10000.00)) +--------------------------------------------------PhysicalOlapScan[store_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((sts.ss_net_paid > 0.00) and (sts.ss_net_profit > 1.00) and (sts.ss_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) +------------------------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query75.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query75.out index 43c13b85a6..fb9d10e30f 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query75.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query75.out @@ -5,65 +5,67 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----hashAgg[GLOBAL] ------PhysicalDistribute --------hashAgg[LOCAL] -----------hashAgg[LOCAL] -------------PhysicalUnion ---------------PhysicalDistribute -----------------PhysicalProject -------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number))otherCondition=() +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalUnion +------------------PhysicalDistribute --------------------PhysicalProject -----------------------PhysicalOlapScan[catalog_returns] ---------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))otherCondition=() +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number))otherCondition=() ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk))otherCondition=() +--------------------------PhysicalOlapScan[catalog_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))otherCondition=() ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Sports')) +--------------------------------------PhysicalOlapScan[item] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Sports')) -----------------------------------PhysicalOlapScan[item] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------filter(d_year IN (2001, 2002)) -------------------------------PhysicalOlapScan[date_dim] ---------------PhysicalDistribute -----------------PhysicalProject -------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number))otherCondition=() +--------------------------------filter(d_year IN (2001, 2002)) +----------------------------------PhysicalOlapScan[date_dim] +------------------PhysicalDistribute --------------------PhysicalProject -----------------------PhysicalOlapScan[store_returns] ---------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))otherCondition=() +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number))otherCondition=() ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = store_sales.ss_item_sk))otherCondition=() +--------------------------PhysicalOlapScan[store_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))otherCondition=() ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_sales] +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = store_sales.ss_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Sports')) +--------------------------------------PhysicalOlapScan[item] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Sports')) -----------------------------------PhysicalOlapScan[item] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------filter(d_year IN (2001, 2002)) -------------------------------PhysicalOlapScan[date_dim] ---------------PhysicalDistribute -----------------PhysicalProject -------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number))otherCondition=() +--------------------------------filter(d_year IN (2001, 2002)) +----------------------------------PhysicalOlapScan[date_dim] +------------------PhysicalDistribute --------------------PhysicalProject -----------------------PhysicalOlapScan[web_returns] ---------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))otherCondition=() +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number))otherCondition=() ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = web_sales.ws_item_sk))otherCondition=() +--------------------------PhysicalOlapScan[web_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))otherCondition=() ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = web_sales.ws_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Sports')) +--------------------------------------PhysicalOlapScan[item] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Sports')) -----------------------------------PhysicalOlapScan[item] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------filter(d_year IN (2001, 2002)) -------------------------------PhysicalOlapScan[date_dim] +--------------------------------filter(d_year IN (2001, 2002)) +----------------------------------PhysicalOlapScan[date_dim] --PhysicalResultSink ----PhysicalTopN ------PhysicalDistribute diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out index 67d53678b4..11eb0d9c9b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out @@ -33,10 +33,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------------PhysicalProject --------------------------filter((store.s_state = 'SD')) ----------------------------PhysicalOlapScan[store] -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute -----------------------hashAgg[LOCAL] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------PhysicalDistribute +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute +------------------------hashAgg[LOCAL] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out index 673bc53892..92cf94f079 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out @@ -4,84 +4,86 @@ PhysicalResultSink --PhysicalTopN ----PhysicalDistribute ------PhysicalTopN ---------hashAgg[LOCAL] -----------PhysicalUnion -------------PhysicalDistribute ---------------PhysicalProject -----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) -------------------PhysicalWindow ---------------------PhysicalQuickSort +--------hashAgg[GLOBAL] +----------PhysicalDistribute +------------hashAgg[LOCAL] +--------------PhysicalUnion +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ----------------------PhysicalWindow ------------------------PhysicalQuickSort ---------------------------PhysicalDistribute +--------------------------PhysicalWindow ----------------------------PhysicalQuickSort -------------------------------PhysicalProject ---------------------------------hashAgg[GLOBAL] -----------------------------------PhysicalDistribute -------------------------------------hashAgg[LOCAL] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_item_sk = wr.wr_item_sk) and (ws.ws_order_number = wr.wr_order_number))otherCondition=() +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] ------------------------------------------PhysicalProject ---------------------------------------------filter((wr.wr_return_amt > 10000.00)) -----------------------------------------------PhysicalOlapScan[web_returns] -------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_sold_date_sk = date_dim.d_date_sk))otherCondition=() ---------------------------------------------PhysicalProject -----------------------------------------------filter((ws.ws_net_paid > 0.00) and (ws.ws_net_profit > 1.00) and (ws.ws_quantity > 0)) -------------------------------------------------PhysicalOlapScan[web_sales] ---------------------------------------------PhysicalDistribute +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_item_sk = wr.wr_item_sk) and (ws.ws_order_number = wr.wr_order_number))otherCondition=() ----------------------------------------------PhysicalProject -------------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 1999)) ---------------------------------------------------PhysicalOlapScan[date_dim] -------------PhysicalDistribute ---------------PhysicalProject -----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) -------------------PhysicalWindow ---------------------PhysicalQuickSort +------------------------------------------------filter((wr.wr_return_amt > 10000.00)) +--------------------------------------------------PhysicalOlapScan[web_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((ws.ws_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((ws.ws_net_paid > 0.00) and (ws.ws_net_profit > 1.00) and (ws.ws_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[web_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 1999)) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ----------------------PhysicalWindow ------------------------PhysicalQuickSort ---------------------------PhysicalDistribute +--------------------------PhysicalWindow ----------------------------PhysicalQuickSort -------------------------------PhysicalProject ---------------------------------hashAgg[GLOBAL] -----------------------------------PhysicalDistribute -------------------------------------hashAgg[LOCAL] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_item_sk = cr.cr_item_sk) and (cs.cs_order_number = cr.cr_order_number))otherCondition=() +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] ------------------------------------------PhysicalProject ---------------------------------------------filter((cr.cr_return_amount > 10000.00)) -----------------------------------------------PhysicalOlapScan[catalog_returns] -------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_sold_date_sk = date_dim.d_date_sk))otherCondition=() ---------------------------------------------PhysicalProject -----------------------------------------------filter((cs.cs_net_paid > 0.00) and (cs.cs_net_profit > 1.00) and (cs.cs_quantity > 0)) -------------------------------------------------PhysicalOlapScan[catalog_sales] ---------------------------------------------PhysicalDistribute +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_item_sk = cr.cr_item_sk) and (cs.cs_order_number = cr.cr_order_number))otherCondition=() ----------------------------------------------PhysicalProject -------------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 1999)) ---------------------------------------------------PhysicalOlapScan[date_dim] -------------PhysicalDistribute ---------------PhysicalProject -----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) -------------------PhysicalWindow ---------------------PhysicalQuickSort +------------------------------------------------filter((cr.cr_return_amount > 10000.00)) +--------------------------------------------------PhysicalOlapScan[catalog_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs.cs_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((cs.cs_net_paid > 0.00) and (cs.cs_net_profit > 1.00) and (cs.cs_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[catalog_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 1999)) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ----------------------PhysicalWindow ------------------------PhysicalQuickSort ---------------------------PhysicalDistribute +--------------------------PhysicalWindow ----------------------------PhysicalQuickSort -------------------------------PhysicalProject ---------------------------------hashAgg[GLOBAL] -----------------------------------PhysicalDistribute -------------------------------------hashAgg[LOCAL] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_item_sk = sr.sr_item_sk) and (sts.ss_ticket_number = sr.sr_ticket_number))otherCondition=() +------------------------------PhysicalDistribute +--------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute +----------------------------------------hashAgg[LOCAL] ------------------------------------------PhysicalProject ---------------------------------------------filter((sr.sr_return_amt > 10000.00)) -----------------------------------------------PhysicalOlapScan[store_returns] -------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_sold_date_sk = date_dim.d_date_sk))otherCondition=() ---------------------------------------------PhysicalProject -----------------------------------------------filter((sts.ss_net_paid > 0.00) and (sts.ss_net_profit > 1.00) and (sts.ss_quantity > 0)) -------------------------------------------------PhysicalOlapScan[store_sales] ---------------------------------------------PhysicalDistribute +--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_item_sk = sr.sr_item_sk) and (sts.ss_ticket_number = sr.sr_ticket_number))otherCondition=() ----------------------------------------------PhysicalProject -------------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 1999)) ---------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------------filter((sr.sr_return_amt > 10000.00)) +--------------------------------------------------PhysicalOlapScan[store_returns] +----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((sts.ss_sold_date_sk = date_dim.d_date_sk))otherCondition=() +------------------------------------------------PhysicalProject +--------------------------------------------------filter((sts.ss_net_paid > 0.00) and (sts.ss_net_profit > 1.00) and (sts.ss_quantity > 0)) +----------------------------------------------------PhysicalOlapScan[store_sales] +------------------------------------------------PhysicalDistribute +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 1999)) +------------------------------------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out index 11412b8ea1..07745a5f3d 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out @@ -5,65 +5,67 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----hashAgg[GLOBAL] ------PhysicalDistribute --------hashAgg[LOCAL] -----------hashAgg[LOCAL] -------------PhysicalUnion ---------------PhysicalDistribute -----------------PhysicalProject -------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number))otherCondition=() +----------hashAgg[GLOBAL] +------------PhysicalDistribute +--------------hashAgg[LOCAL] +----------------PhysicalUnion +------------------PhysicalDistribute --------------------PhysicalProject -----------------------PhysicalOlapScan[catalog_returns] ---------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))otherCondition=() +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number))otherCondition=() ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk))otherCondition=() +--------------------------PhysicalOlapScan[catalog_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))otherCondition=() ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Home')) +--------------------------------------PhysicalOlapScan[item] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] ---------------PhysicalDistribute -----------------PhysicalProject -------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number))otherCondition=() +--------------------------------filter(d_year IN (1998, 1999)) +----------------------------------PhysicalOlapScan[date_dim] +------------------PhysicalDistribute --------------------PhysicalProject -----------------------PhysicalOlapScan[store_returns] ---------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))otherCondition=() +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number))otherCondition=() ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = store_sales.ss_item_sk))otherCondition=() +--------------------------PhysicalOlapScan[store_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))otherCondition=() ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_sales] +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = store_sales.ss_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Home')) +--------------------------------------PhysicalOlapScan[item] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] ---------------PhysicalDistribute -----------------PhysicalProject -------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number))otherCondition=() +--------------------------------filter(d_year IN (1998, 1999)) +----------------------------------PhysicalOlapScan[date_dim] +------------------PhysicalDistribute --------------------PhysicalProject -----------------------PhysicalOlapScan[web_returns] ---------------------PhysicalProject -----------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))otherCondition=() +----------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number))otherCondition=() ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = web_sales.ws_item_sk))otherCondition=() +--------------------------PhysicalOlapScan[web_returns] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))otherCondition=() ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] +------------------------------hashJoin[INNER_JOIN] hashCondition=((item.i_item_sk = web_sales.ws_item_sk))otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] +--------------------------------PhysicalDistribute +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Home')) +--------------------------------------PhysicalOlapScan[item] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((item.i_category = 'Home')) -----------------------------------PhysicalOlapScan[item] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------filter(d_year IN (1998, 1999)) -------------------------------PhysicalOlapScan[date_dim] +--------------------------------filter(d_year IN (1998, 1999)) +----------------------------------PhysicalOlapScan[date_dim] --PhysicalResultSink ----PhysicalTopN ------PhysicalDistribute diff --git a/regression-test/suites/nereids_p0/aggregate/agg_union_random.groovy b/regression-test/suites/nereids_p0/aggregate/agg_union_random.groovy new file mode 100644 index 0000000000..f233c80dac --- /dev/null +++ b/regression-test/suites/nereids_p0/aggregate/agg_union_random.groovy @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("agg_union_random") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "DROP TABLE IF EXISTS test_random;" + sql """ + create table test_random + ( + a varchar(100) null, + b decimalv3(18,10) null + ) ENGINE=OLAP + DUPLICATE KEY(`a`) + DISTRIBUTED BY HASH(`a`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + explain{ + sql "select a from (select * from test_random union all (select * from test_random))t group by a" + /** + STREAM DATA SINK + EXCHANGE ID: 258 + RANDOM + + 252:VOlapScanNode + TABLE: default_cluster:regression_test_nereids_p0_aggregate.test_random(test_random), PREAGGREGATION: ON + partitions=0/1, tablets=0/0, tabletList= + cardinality=1, avgRowSize=0.0, numNodes=1 + pushAggOp=NONE + **/ + contains "RANDOM" + } + + sql "DROP TABLE IF EXISTS test_random;" +}