diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java index d43171375b..3173b654d4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java @@ -20,6 +20,7 @@ package org.apache.doris.nereids.stats; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Cast; import org.apache.doris.nereids.trees.expressions.EqualPredicate; +import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.JoinType; @@ -44,6 +45,7 @@ import java.util.stream.Collectors; */ public class JoinEstimation { private static double DEFAULT_ANTI_JOIN_SELECTIVITY_COEFFICIENT = 0.3; + private static double UNKNOWN_COL_STATS_FILTER_SEL_LOWER_BOUND = 0.5; private static EqualPredicate normalizeHashJoinCondition(EqualPredicate equal, Statistics leftStats, Statistics rightStats) { @@ -169,6 +171,27 @@ public class JoinEstimation { .build(); } + private static double computeSelectivityForBuildSideWhenColStatsUnknown(Statistics buildStats, Join join) { + double sel = 1.0; + for (Expression cond : join.getHashJoinConjuncts()) { + if (cond instanceof EqualTo) { + EqualTo equal = (EqualTo) cond; + if (equal.left() instanceof Slot && equal.right() instanceof Slot) { + ColumnStatistic buildColStats = buildStats.findColumnStatistics(equal.left()); + if (buildColStats == null) { + buildColStats = buildStats.findColumnStatistics(equal.right()); + } + if (buildColStats != null) { + double buildSel = Math.min(buildStats.getRowCount() / buildColStats.count, 1.0); + buildSel = Math.max(buildSel, UNKNOWN_COL_STATS_FILTER_SEL_LOWER_BOUND); + sel = Math.min(sel, buildSel); + } + } + } + } + return sel; + } + private static Statistics estimateInnerJoin(Statistics leftStats, Statistics rightStats, Join join) { if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, join)) { double rowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount()); @@ -245,14 +268,15 @@ public class JoinEstimation { private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics rightStats, Join join) { if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, join)) { + double sel = computeSelectivityForBuildSideWhenColStatsUnknown(rightStats, join); if (join.getJoinType().isLeftSemiOrAntiJoin()) { - return new StatisticsBuilder().setRowCount(leftStats.getRowCount()) + return new StatisticsBuilder().setRowCount(leftStats.getRowCount() * sel) .putColumnStatistics(leftStats.columnStatistics()) .putColumnStatistics(rightStats.columnStatistics()) .build(); } else { //right semi or anti - return new StatisticsBuilder().setRowCount(rightStats.getRowCount()) + return new StatisticsBuilder().setRowCount(rightStats.getRowCount() * sel) .putColumnStatistics(leftStats.columnStatistics()) .putColumnStatistics(rightStats.columnStatistics()) .build(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 741fe60e20..5cd650bbb5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -668,7 +668,20 @@ public class StatsCalculator extends DefaultPlanVisitor { columnStatisticMap.put(slotReference, ColumnStatistic.UNKNOWN); } } - return new Statistics(rowCount, columnStatisticMap); + Statistics stats = new Statistics(rowCount, columnStatisticMap); + stats = normalizeCatalogRelationColumnStatsRowCount(stats); + return stats; + } + + private Statistics normalizeCatalogRelationColumnStatsRowCount(Statistics stats) { + for (Expression slot : stats.columnStatistics().keySet()) { + ColumnStatistic colStats = stats.findColumnStatistics(slot); + Preconditions.checkArgument(colStats != null, + "can not find col stats for %s in table", slot.toSql()); + stats.addColumnStats(slot, + new ColumnStatisticBuilder(colStats).setCount(stats.getRowCount()).build()); + } + return stats; } private Statistics computeTopN(TopN topN) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java index c6bdd7b6b2..6e9cfbb643 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java @@ -228,9 +228,9 @@ public abstract class AbstractPhysicalJoin< @Override public String toString() { List args = Lists.newArrayList("type", joinType, + "stats", statistics, "hashCondition", hashJoinConjuncts, - "otherCondition", otherJoinConjuncts, - "stats", statistics); + "otherCondition", otherJoinConjuncts); if (markJoinSlotReference.isPresent()) { args.add("isMarkJoin"); args.add("true"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java index 20c4ebf795..a4a2d7254b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java @@ -121,8 +121,9 @@ public class PhysicalOlapScan extends PhysicalCatalogRelation implements OlapSca @Override public String toString() { return Utils.toSqlString("PhysicalOlapScan[" + id.asInt() + "]" + getGroupIdWithPrefix(), - "qualified", Utils.qualifiedName(qualifier, table.getName()), - "stats", statistics, "fr", getMutableState(AbstractPlan.FRAGMENT_ID) + "table", table.getName(), + "stats", statistics, + "fr", getMutableState(AbstractPlan.FRAGMENT_ID) ); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index d1af7756f8..72d0c68f0f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -298,7 +298,8 @@ public class ColumnStatistic { @Override public String toString() { - return isUnKnown ? "unknown" : String.format("ndv=%.4f, min=%f(%s), max=%f(%s), count=%.4f, avgSizeByte=%f", + return isUnKnown ? "unknown(" + count + ")" + : String.format("ndv=%.4f, min=%f(%s), max=%f(%s), count=%.4f, avgSizeByte=%f", ndv, minValue, minExpr, maxValue, maxExpr, count, avgSizeByte); } diff --git a/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out b/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out index 49cf927de5..4865cbdc7b 100644 --- a/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out +++ b/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out @@ -1,130 +1,81 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !groupby_positive_case -- PhysicalResultSink ---PhysicalDistribute -----PhysicalProject -------hashAgg[LOCAL] ---------hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.a = T2.a)) otherCondition=() build RFs:RF0 a->[a] -----------PhysicalProject -------------filter((T1.__DORIS_DELETE_SIGN__ = 0)) ---------------PhysicalOlapScan[T1] apply RFs: RF0 -----------PhysicalDistribute -------------PhysicalProject ---------------filter((T2.__DORIS_DELETE_SIGN__ = 0)) -----------------PhysicalOlapScan[T2] +--hashAgg[LOCAL] +----hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.a = T2.a)) otherCondition=() build RFs:RF0 a->[a] +------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T1] apply RFs: RF0 +------filter((T2.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T2] -- !groupby_negative_case -- PhysicalResultSink ---PhysicalDistribute -----PhysicalProject -------hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.D = expr_cast(a as BIGINT))) otherCondition=() ---------PhysicalDistribute -----------PhysicalProject -------------hashAgg[LOCAL] ---------------PhysicalProject -----------------filter((T1.__DORIS_DELETE_SIGN__ = 0)) -------------------PhysicalOlapScan[T1] ---------PhysicalDistribute -----------PhysicalProject -------------filter((T2.__DORIS_DELETE_SIGN__ = 0)) ---------------PhysicalOlapScan[T2] +--hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.D = expr_cast(a as BIGINT))) otherCondition=() +----hashAgg[LOCAL] +------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] -- !grouping_positive_case -- PhysicalResultSink ---PhysicalDistribute -----PhysicalProject +--hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.a = T2.a)) otherCondition=() build RFs:RF0 a->[a] +----hashAgg[GLOBAL] ------hashAgg[LOCAL] ---------hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.a = T2.a)) otherCondition=() build RFs:RF0 a->[a] -----------PhysicalDistribute -------------PhysicalRepeat ---------------PhysicalProject -----------------filter((T1.__DORIS_DELETE_SIGN__ = 0)) -------------------PhysicalOlapScan[T1] apply RFs: RF0 -----------PhysicalDistribute -------------PhysicalProject ---------------filter((T2.__DORIS_DELETE_SIGN__ = 0)) -----------------PhysicalOlapScan[T2] +--------PhysicalRepeat +----------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[T1] apply RFs: RF0 +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] -- !grouping_negative_case -- PhysicalResultSink ---PhysicalDistribute -----PhysicalProject -------hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.D = expr_cast(a as BIGINT))) otherCondition=() ---------PhysicalDistribute -----------PhysicalProject -------------hashAgg[GLOBAL] ---------------PhysicalDistribute -----------------hashAgg[LOCAL] -------------------PhysicalRepeat ---------------------PhysicalProject -----------------------filter((T1.__DORIS_DELETE_SIGN__ = 0)) -------------------------PhysicalOlapScan[T1] ---------PhysicalDistribute -----------PhysicalProject -------------filter((T2.__DORIS_DELETE_SIGN__ = 0)) ---------------PhysicalOlapScan[T2] +--hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.D = expr_cast(a as BIGINT))) otherCondition=() +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalRepeat +----------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] -- !groupby_positive_case2 -- PhysicalResultSink ---PhysicalDistribute -----hashAgg[LOCAL] -------hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.a = T2.a)) otherCondition=() build RFs:RF0 a->[a] ---------PhysicalProject -----------filter((T1.__DORIS_DELETE_SIGN__ = 0)) -------------PhysicalOlapScan[T1] apply RFs: RF0 ---------PhysicalDistribute -----------PhysicalProject -------------filter((T2.__DORIS_DELETE_SIGN__ = 0)) ---------------PhysicalOlapScan[T2] +--hashAgg[LOCAL] +----hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.a = T2.a)) otherCondition=() build RFs:RF0 a->[a] +------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T1] apply RFs: RF0 +------filter((T2.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T2] -- !groupby_negative_case2 -- PhysicalResultSink ---PhysicalDistribute -----PhysicalProject -------hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.D = expr_cast(a as BIGINT))) otherCondition=() ---------PhysicalDistribute -----------PhysicalProject -------------hashAgg[LOCAL] ---------------PhysicalProject -----------------filter((T1.__DORIS_DELETE_SIGN__ = 0)) -------------------PhysicalOlapScan[T1] ---------PhysicalDistribute -----------PhysicalProject -------------filter((T2.__DORIS_DELETE_SIGN__ = 0)) ---------------PhysicalOlapScan[T2] +--hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.D = expr_cast(a as BIGINT))) otherCondition=() +----hashAgg[LOCAL] +------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] -- !grouping_positive_case2 -- PhysicalResultSink ---PhysicalDistribute -----PhysicalProject +--hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.a = T2.a)) otherCondition=() build RFs:RF0 a->[a] +----hashAgg[GLOBAL] ------hashAgg[LOCAL] ---------hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.a = T2.a)) otherCondition=() build RFs:RF0 a->[a] -----------PhysicalDistribute -------------PhysicalRepeat ---------------PhysicalProject -----------------filter((T1.__DORIS_DELETE_SIGN__ = 0)) -------------------PhysicalOlapScan[T1] apply RFs: RF0 -----------PhysicalDistribute -------------PhysicalProject ---------------filter((T2.__DORIS_DELETE_SIGN__ = 0)) -----------------PhysicalOlapScan[T2] +--------PhysicalRepeat +----------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[T1] apply RFs: RF0 +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] -- !grouping_negative_case2 -- PhysicalResultSink ---PhysicalDistribute -----PhysicalProject -------hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.D = expr_cast(a as BIGINT))) otherCondition=() ---------PhysicalDistribute -----------PhysicalProject -------------hashAgg[GLOBAL] ---------------PhysicalDistribute -----------------hashAgg[LOCAL] -------------------PhysicalRepeat ---------------------PhysicalProject -----------------------filter((T1.__DORIS_DELETE_SIGN__ = 0)) -------------------------PhysicalOlapScan[T1] ---------PhysicalDistribute -----------PhysicalProject -------------filter((T2.__DORIS_DELETE_SIGN__ = 0)) ---------------PhysicalOlapScan[T2] +--hashJoin[LEFT_SEMI_JOIN] hashCondition=((T3.D = expr_cast(a as BIGINT))) otherCondition=() +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalRepeat +----------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q20-rewrite.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q20-rewrite.out index 7c1b5920e4..525bc7be55 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q20-rewrite.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q20-rewrite.out @@ -5,30 +5,31 @@ PhysicalResultSink ----PhysicalDistribute ------PhysicalQuickSort[LOCAL_SORT] --------PhysicalProject -----------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((supplier.s_suppkey = t3.ps_suppkey)) otherCondition=() build RFs:RF4 s_suppkey->[ps_suppkey] -------------PhysicalDistribute ---------------PhysicalProject -----------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((partsupp.ps_partkey = part.p_partkey)) otherCondition=() build RFs:RF3 p_partkey->[ps_partkey] -------------------hashJoin[INNER_JOIN] hashCondition=((t2.l_partkey = t1.ps_partkey) and (t2.l_suppkey = t1.ps_suppkey)) otherCondition=((cast(ps_availqty as DECIMALV3(38, 3)) > t2.l_q)) build RFs:RF1 l_partkey->[ps_partkey];RF2 l_suppkey->[ps_suppkey] ---------------------PhysicalProject -----------------------PhysicalOlapScan[partsupp] apply RFs: RF1 RF2 RF3 RF4 ---------------------PhysicalDistribute +----------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF4 n_nationkey->[s_nationkey] +------------PhysicalProject +--------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((supplier.s_suppkey = t3.ps_suppkey)) otherCondition=() build RFs:RF3 s_suppkey->[ps_suppkey] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN] hashCondition=((t2.l_partkey = t1.ps_partkey) and (t2.l_suppkey = t1.ps_suppkey)) otherCondition=((cast(ps_availqty as DECIMALV3(38, 3)) > t2.l_q)) build RFs:RF1 ps_partkey->[l_partkey];RF2 ps_suppkey->[l_suppkey] ----------------------PhysicalProject ------------------------hashAgg[GLOBAL] --------------------------PhysicalDistribute ----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject --------------------------------filter((lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate >= '1994-01-01')) -----------------------------------PhysicalOlapScan[lineitem] -------------------PhysicalProject ---------------------filter((p_name like 'forest%')) -----------------------PhysicalOlapScan[part] -------------PhysicalDistribute ---------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF0 n_nationkey->[s_nationkey] -----------------PhysicalProject -------------------PhysicalOlapScan[supplier] apply RFs: RF0 +----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 RF2 +----------------------PhysicalDistribute +------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((partsupp.ps_partkey = part.p_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 RF3 +--------------------------PhysicalProject +----------------------------filter((p_name like 'forest%')) +------------------------------PhysicalOlapScan[part] ----------------PhysicalDistribute ------------------PhysicalProject ---------------------filter((nation.n_name = 'CANADA')) -----------------------PhysicalOlapScan[nation] +--------------------PhysicalOlapScan[supplier] apply RFs: RF4 +------------PhysicalDistribute +--------------PhysicalProject +----------------filter((nation.n_name = 'CANADA')) +------------------PhysicalOlapScan[nation] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q20.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q20.out index a7838d0450..1292e0468f 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q20.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q20.out @@ -5,29 +5,30 @@ PhysicalResultSink ----PhysicalDistribute ------PhysicalQuickSort[LOCAL_SORT] --------PhysicalProject -----------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF4 s_suppkey->[ps_suppkey] -------------PhysicalDistribute ---------------PhysicalProject -----------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_partkey = partsupp.ps_partkey) and (lineitem.l_suppkey = partsupp.ps_suppkey)) otherCondition=((cast(ps_availqty as DECIMALV3(38, 3)) > (0.5 * sum(l_quantity)))) build RFs:RF2 l_partkey->[ps_partkey];RF3 l_suppkey->[ps_suppkey] -------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((partsupp.ps_partkey = part.p_partkey)) otherCondition=() build RFs:RF1 p_partkey->[ps_partkey] ---------------------PhysicalProject -----------------------PhysicalOlapScan[partsupp] apply RFs: RF1 RF2 RF3 RF4 ---------------------PhysicalProject -----------------------filter((p_name like 'forest%')) -------------------------PhysicalOlapScan[part] -------------------PhysicalDistribute ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------filter((lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate >= '1994-01-01')) -------------------------------PhysicalOlapScan[lineitem] -------------PhysicalDistribute ---------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF0 n_nationkey->[s_nationkey] -----------------PhysicalProject -------------------PhysicalOlapScan[supplier] apply RFs: RF0 +----------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF4 n_nationkey->[s_nationkey] +------------PhysicalProject +--------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF3 s_suppkey->[ps_suppkey] ----------------PhysicalDistribute ------------------PhysicalProject ---------------------filter((nation.n_name = 'CANADA')) -----------------------PhysicalOlapScan[nation] +--------------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_partkey = partsupp.ps_partkey) and (lineitem.l_suppkey = partsupp.ps_suppkey)) otherCondition=((cast(ps_availqty as DECIMALV3(38, 3)) > (0.5 * sum(l_quantity)))) build RFs:RF1 ps_partkey->[l_partkey];RF2 ps_suppkey->[l_suppkey] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------filter((lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate >= '1994-01-01')) +--------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 RF2 +----------------------PhysicalDistribute +------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((partsupp.ps_partkey = part.p_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 RF3 +--------------------------PhysicalProject +----------------------------filter((p_name like 'forest%')) +------------------------------PhysicalOlapScan[part] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------PhysicalOlapScan[supplier] apply RFs: RF4 +------------PhysicalDistribute +--------------PhysicalProject +----------------filter((nation.n_name = 'CANADA')) +------------------PhysicalOlapScan[nation] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q21.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q21.out index b8378fe828..b248bc83d6 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q21.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q21.out @@ -8,27 +8,29 @@ PhysicalResultSink ----------PhysicalDistribute ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((l3.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) -------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((l2.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF3 l_orderkey->[l_orderkey] ---------------------PhysicalProject -----------------------PhysicalOlapScan[lineitem] apply RFs: RF3 ---------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] -----------------------hashJoin[INNER_JOIN] hashCondition=((orders.o_orderkey = l1.l_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey] -------------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_suppkey = l1.l_suppkey)) otherCondition=() build RFs:RF0 s_suppkey->[l_suppkey] ---------------------------PhysicalProject -----------------------------filter((l1.l_receiptdate > l1.l_commitdate)) -------------------------------PhysicalOlapScan[lineitem] apply RFs: RF0 RF1 ---------------------------PhysicalDistribute +----------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF3 n_nationkey->[s_nationkey] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_suppkey = l1.l_suppkey)) otherCondition=() build RFs:RF2 s_suppkey->[l_suppkey] +----------------------PhysicalDistribute +------------------------hashJoin[INNER_JOIN] hashCondition=((orders.o_orderkey = l1.l_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey] +--------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((l2.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF0 l_orderkey->[l_orderkey] ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[supplier] apply RFs: RF2 -------------------------PhysicalProject ---------------------------filter((orders.o_orderstatus = 'F')) -----------------------------PhysicalOlapScan[orders] +------------------------------PhysicalOlapScan[lineitem] apply RFs: RF0 +----------------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((l3.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) +------------------------------PhysicalProject +--------------------------------filter((l1.l_receiptdate > l1.l_commitdate)) +----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 RF2 +------------------------------PhysicalProject +--------------------------------filter((l3.l_receiptdate > l3.l_commitdate)) +----------------------------------PhysicalOlapScan[lineitem] +--------------------------PhysicalProject +----------------------------filter((orders.o_orderstatus = 'F')) +------------------------------PhysicalOlapScan[orders] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------filter((nation.n_name = 'SAUDI ARABIA')) -----------------------------PhysicalOlapScan[nation] -------------------PhysicalProject ---------------------filter((l3.l_receiptdate > l3.l_commitdate)) -----------------------PhysicalOlapScan[lineitem] +--------------------------PhysicalOlapScan[supplier] apply RFs: RF3 +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------filter((nation.n_name = 'SAUDI ARABIA')) +------------------------PhysicalOlapScan[nation] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q20-rewrite.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q20-rewrite.out index 7c1b5920e4..525bc7be55 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q20-rewrite.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q20-rewrite.out @@ -5,30 +5,31 @@ PhysicalResultSink ----PhysicalDistribute ------PhysicalQuickSort[LOCAL_SORT] --------PhysicalProject -----------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((supplier.s_suppkey = t3.ps_suppkey)) otherCondition=() build RFs:RF4 s_suppkey->[ps_suppkey] -------------PhysicalDistribute ---------------PhysicalProject -----------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((partsupp.ps_partkey = part.p_partkey)) otherCondition=() build RFs:RF3 p_partkey->[ps_partkey] -------------------hashJoin[INNER_JOIN] hashCondition=((t2.l_partkey = t1.ps_partkey) and (t2.l_suppkey = t1.ps_suppkey)) otherCondition=((cast(ps_availqty as DECIMALV3(38, 3)) > t2.l_q)) build RFs:RF1 l_partkey->[ps_partkey];RF2 l_suppkey->[ps_suppkey] ---------------------PhysicalProject -----------------------PhysicalOlapScan[partsupp] apply RFs: RF1 RF2 RF3 RF4 ---------------------PhysicalDistribute +----------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF4 n_nationkey->[s_nationkey] +------------PhysicalProject +--------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((supplier.s_suppkey = t3.ps_suppkey)) otherCondition=() build RFs:RF3 s_suppkey->[ps_suppkey] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN] hashCondition=((t2.l_partkey = t1.ps_partkey) and (t2.l_suppkey = t1.ps_suppkey)) otherCondition=((cast(ps_availqty as DECIMALV3(38, 3)) > t2.l_q)) build RFs:RF1 ps_partkey->[l_partkey];RF2 ps_suppkey->[l_suppkey] ----------------------PhysicalProject ------------------------hashAgg[GLOBAL] --------------------------PhysicalDistribute ----------------------------hashAgg[LOCAL] ------------------------------PhysicalProject --------------------------------filter((lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate >= '1994-01-01')) -----------------------------------PhysicalOlapScan[lineitem] -------------------PhysicalProject ---------------------filter((p_name like 'forest%')) -----------------------PhysicalOlapScan[part] -------------PhysicalDistribute ---------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF0 n_nationkey->[s_nationkey] -----------------PhysicalProject -------------------PhysicalOlapScan[supplier] apply RFs: RF0 +----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 RF2 +----------------------PhysicalDistribute +------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((partsupp.ps_partkey = part.p_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 RF3 +--------------------------PhysicalProject +----------------------------filter((p_name like 'forest%')) +------------------------------PhysicalOlapScan[part] ----------------PhysicalDistribute ------------------PhysicalProject ---------------------filter((nation.n_name = 'CANADA')) -----------------------PhysicalOlapScan[nation] +--------------------PhysicalOlapScan[supplier] apply RFs: RF4 +------------PhysicalDistribute +--------------PhysicalProject +----------------filter((nation.n_name = 'CANADA')) +------------------PhysicalOlapScan[nation] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q20.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q20.out index a7838d0450..1292e0468f 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q20.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q20.out @@ -5,29 +5,30 @@ PhysicalResultSink ----PhysicalDistribute ------PhysicalQuickSort[LOCAL_SORT] --------PhysicalProject -----------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF4 s_suppkey->[ps_suppkey] -------------PhysicalDistribute ---------------PhysicalProject -----------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_partkey = partsupp.ps_partkey) and (lineitem.l_suppkey = partsupp.ps_suppkey)) otherCondition=((cast(ps_availqty as DECIMALV3(38, 3)) > (0.5 * sum(l_quantity)))) build RFs:RF2 l_partkey->[ps_partkey];RF3 l_suppkey->[ps_suppkey] -------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((partsupp.ps_partkey = part.p_partkey)) otherCondition=() build RFs:RF1 p_partkey->[ps_partkey] ---------------------PhysicalProject -----------------------PhysicalOlapScan[partsupp] apply RFs: RF1 RF2 RF3 RF4 ---------------------PhysicalProject -----------------------filter((p_name like 'forest%')) -------------------------PhysicalOlapScan[part] -------------------PhysicalDistribute ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------filter((lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate >= '1994-01-01')) -------------------------------PhysicalOlapScan[lineitem] -------------PhysicalDistribute ---------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF0 n_nationkey->[s_nationkey] -----------------PhysicalProject -------------------PhysicalOlapScan[supplier] apply RFs: RF0 +----------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF4 n_nationkey->[s_nationkey] +------------PhysicalProject +--------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((supplier.s_suppkey = partsupp.ps_suppkey)) otherCondition=() build RFs:RF3 s_suppkey->[ps_suppkey] ----------------PhysicalDistribute ------------------PhysicalProject ---------------------filter((nation.n_name = 'CANADA')) -----------------------PhysicalOlapScan[nation] +--------------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_partkey = partsupp.ps_partkey) and (lineitem.l_suppkey = partsupp.ps_suppkey)) otherCondition=((cast(ps_availqty as DECIMALV3(38, 3)) > (0.5 * sum(l_quantity)))) build RFs:RF1 ps_partkey->[l_partkey];RF2 ps_suppkey->[l_suppkey] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------filter((lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate >= '1994-01-01')) +--------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 RF2 +----------------------PhysicalDistribute +------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((partsupp.ps_partkey = part.p_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 RF3 +--------------------------PhysicalProject +----------------------------filter((p_name like 'forest%')) +------------------------------PhysicalOlapScan[part] +----------------PhysicalDistribute +------------------PhysicalProject +--------------------PhysicalOlapScan[supplier] apply RFs: RF4 +------------PhysicalDistribute +--------------PhysicalProject +----------------filter((nation.n_name = 'CANADA')) +------------------PhysicalOlapScan[nation] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q21.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q21.out index b8378fe828..b248bc83d6 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q21.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q21.out @@ -8,27 +8,29 @@ PhysicalResultSink ----------PhysicalDistribute ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((l3.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) -------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((l2.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF3 l_orderkey->[l_orderkey] ---------------------PhysicalProject -----------------------PhysicalOlapScan[lineitem] apply RFs: RF3 ---------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[s_nationkey] -----------------------hashJoin[INNER_JOIN] hashCondition=((orders.o_orderkey = l1.l_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey] -------------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_suppkey = l1.l_suppkey)) otherCondition=() build RFs:RF0 s_suppkey->[l_suppkey] ---------------------------PhysicalProject -----------------------------filter((l1.l_receiptdate > l1.l_commitdate)) -------------------------------PhysicalOlapScan[lineitem] apply RFs: RF0 RF1 ---------------------------PhysicalDistribute +----------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF3 n_nationkey->[s_nationkey] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN] hashCondition=((supplier.s_suppkey = l1.l_suppkey)) otherCondition=() build RFs:RF2 s_suppkey->[l_suppkey] +----------------------PhysicalDistribute +------------------------hashJoin[INNER_JOIN] hashCondition=((orders.o_orderkey = l1.l_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey] +--------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((l2.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) build RFs:RF0 l_orderkey->[l_orderkey] ----------------------------PhysicalProject -------------------------------PhysicalOlapScan[supplier] apply RFs: RF2 -------------------------PhysicalProject ---------------------------filter((orders.o_orderstatus = 'F')) -----------------------------PhysicalOlapScan[orders] +------------------------------PhysicalOlapScan[lineitem] apply RFs: RF0 +----------------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((l3.l_orderkey = l1.l_orderkey)) otherCondition=(( not (l_suppkey = l_suppkey))) +------------------------------PhysicalProject +--------------------------------filter((l1.l_receiptdate > l1.l_commitdate)) +----------------------------------PhysicalOlapScan[lineitem] apply RFs: RF1 RF2 +------------------------------PhysicalProject +--------------------------------filter((l3.l_receiptdate > l3.l_commitdate)) +----------------------------------PhysicalOlapScan[lineitem] +--------------------------PhysicalProject +----------------------------filter((orders.o_orderstatus = 'F')) +------------------------------PhysicalOlapScan[orders] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------filter((nation.n_name = 'SAUDI ARABIA')) -----------------------------PhysicalOlapScan[nation] -------------------PhysicalProject ---------------------filter((l3.l_receiptdate > l3.l_commitdate)) -----------------------PhysicalOlapScan[lineitem] +--------------------------PhysicalOlapScan[supplier] apply RFs: RF3 +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------filter((nation.n_name = 'SAUDI ARABIA')) +------------------------PhysicalOlapScan[nation] diff --git a/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy b/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy index ea557f0de7..c3370019cb 100644 --- a/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy +++ b/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy @@ -22,6 +22,7 @@ suite("transposeSemiJoinAgg") { sql "SET enable_nereids_planner=true" sql "SET enable_fallback_to_original_planner=false" sql "set partition_pruning_expand_threshold=10;" + sql "set ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" sql "drop table if exists T1;" sql """ CREATE TABLE T1 (