From 1be38e798dab8dcaeadafe721b0ea128a2ff4de1 Mon Sep 17 00:00:00 2001 From: minghong Date: Mon, 1 Apr 2024 22:46:28 +0800 Subject: [PATCH] push topn-filter to both sides of inner join (#33112) --- .../nereids/processor/post/TopNScanOpt.java | 50 +++++++++++++------ .../nereids_tpch_p0/tpch/topn-filter.groovy | 31 ++++++++++-- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java index 63754afe4d..a8129639f5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java @@ -103,26 +103,44 @@ public class TopNScanOpt extends PlanPostProcessor { } private OlapScan findScanNodeBySlotReference(Plan root, SlotReference slot, boolean nullsFirst) { - // topn-filter cannot be pushed through right/full outer join if the first orderKey is nulls first - if (nullsFirst && root instanceof Join) { - Join join = (Join) root; - if (join.getJoinType().isRightOuterJoin() || join.getJoinType().isFullOuterJoin()) { + if (root instanceof PhysicalWindow) { + return null; + } + + if (root instanceof OlapScan) { + if (root.getOutputSet().contains(slot)) { + return (OlapScan) root; + } else { return null; } } + OlapScan target = null; - if (root instanceof OlapScan && root.getOutputSet().contains(slot)) { - return (OlapScan) root; - } else { - if (! root.children().isEmpty()) { - // for join and intersect, push topn-filter to their left child. - // TODO for union, topn-filter can be pushed down to all of its children. - Plan child = root.child(0); - if (!(child instanceof PhysicalWindow) && child.getOutputSet().contains(slot)) { - target = findScanNodeBySlotReference(child, slot, nullsFirst); - if (target != null) { - return target; - } + if (root instanceof Join) { + Join join = (Join) root; + if (nullsFirst && join.getJoinType().isOuterJoin()) { + // in fact, topn-filter can be pushed down to the left child of leftOuterJoin + // and to the right child of rightOuterJoin. + // but we have rule to push topn down to the left/right side. and topn-filter + // will be generated according to the inferred topn node. + return null; + } + // try to push to both left and right child + if (root.child(0).getOutputSet().contains(slot)) { + target = findScanNodeBySlotReference(root.child(0), slot, nullsFirst); + } else { + target = findScanNodeBySlotReference(root.child(1), slot, nullsFirst); + } + return target; + } + + if (!root.children().isEmpty()) { + // TODO for set operator, topn-filter can be pushed down to all of its children. + Plan child = root.child(0); + if (child.getOutputSet().contains(slot)) { + target = findScanNodeBySlotReference(child, slot, nullsFirst); + if (target != null) { + return target; } } } diff --git a/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy b/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy index 3a29274a2f..abcd33c774 100644 --- a/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy +++ b/regression-test/suites/nereids_tpch_p0/tpch/topn-filter.groovy @@ -79,15 +79,15 @@ suite("topn-filter") { qt_check_result2 "${multi_topn_desc}" - // do not use topn-filter + // push down topn-filter to both join children explain { sql """ select o_orderkey, c_custkey from orders - join[broadcast] customer on o_custkey = c_custkey + join customer on o_custkey = c_custkey order by c_custkey limit 2; """ - notContains "TOPN OPT:" + contains "TOPN OPT:" } // push topn filter down through AGG @@ -123,10 +123,33 @@ suite("topn-filter") { } explain { - sql "select * from nation right outer join region on r_regionkey = n_regionkey order by n_regionkey nulls last limit 1; " + sql "select * from nation full outer join region on r_regionkey = n_regionkey order by n_regionkey nulls first limit 1; " + notContains "TOPN OPT:" + } + + // this topn-filter is not generated from the top TOPN. + // topn + // +--left outer join + // +-- nation + // +-- region + // after topn push down: + // topn1 + // +--left outer join + // +-- topn2 + // +-- nation + // +-- region + // this topn-filter is generated by topn2, not topn1 + explain { + sql "select * from nation left outer join region on r_regionkey = n_regionkey order by n_regionkey nulls first limit 1; " contains "TOPN OPT:" } + // TODO: support latter, push topn to right outer join + // explain { + // sql "select * from nation right outer join region on r_regionkey = n_regionkey order by n_regionkey nulls last limit 1; " + // contains "TOPN OPT:" + // } + explain { sql "select * from nation right outer join region on r_regionkey = n_regionkey order by n_regionkey nulls last, n_name nulls first limit 1; " contains "TOPN OPT:"