From 29d4e8ee900bf95479d0e375b02c041d0f78f288 Mon Sep 17 00:00:00 2001 From: LiBinfeng <46676950+LiBinfeng-01@users.noreply.github.com> Date: Mon, 16 Oct 2023 11:59:45 +0800 Subject: [PATCH] [Fix](Nereids) fix test leading change disable join reorder parameter (#23657) Problem: when running pipeline, we get randomly failed of test_leading Reason: physical distribute was generated and choosed to be the best plan because we can not get any statistic information of empty table. So we would get some unexpect result because we can not expect the order in memo Solved: Add statistic of columns used in test_leading, try repeatly in pipeline --- .../nereids/rules/rewrite/LeadingJoin.java | 8 +- .../org/apache/doris/qe/SessionVariable.java | 8 + .../data/nereids_p0/hint/test_leading.out | 151 +++++++++--------- .../pipeline/p0/conf/regression-conf.groovy | 2 +- .../nereids_p0/hint/test_leading.groovy | 12 ++ 5 files changed, 103 insertions(+), 78 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/LeadingJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/LeadingJoin.java index 0f334384ca..63fda8c906 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/LeadingJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/LeadingJoin.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.common.DdlException; import org.apache.doris.nereids.hint.Hint; import org.apache.doris.nereids.hint.LeadingHint; import org.apache.doris.nereids.jobs.JobContext; @@ -41,7 +42,12 @@ public class LeadingJoin extends DefaultPlanRewriter implements (LeadingHint) leadingHint, ((LeadingHint) leadingHint) .getLeadingTableBitmap(jobContext.getCascadesContext().getTables()))); if (leadingHint.isSuccess()) { - jobContext.getCascadesContext().getConnectContext().getSessionVariable().setDisableJoinReorder(true); + try { + jobContext.getCascadesContext().getConnectContext().getSessionVariable() + .disableNereidsJoinReorderOnce(); + } catch (DdlException e) { + throw new RuntimeException(e); + } } else { return plan; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index da7e5a10d9..1ad2b2b0df 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2629,6 +2629,14 @@ public class SessionVariable implements Serializable, Writable { VariableMgr.setVar(this, new SetVar(SessionVariable.ENABLE_NEREIDS_PLANNER, new StringLiteral("false"))); } + public void disableNereidsJoinReorderOnce() throws DdlException { + if (!enableNereidsPlanner) { + return; + } + setIsSingleSetVar(true); + VariableMgr.setVar(this, new SetVar(SessionVariable.DISABLE_JOIN_REORDER, new StringLiteral("false"))); + } + // return number of variables by given variable annotation public int getVariableNumByVariableAnnotation(VariableAnnotation type) { int num = 0; diff --git a/regression-test/data/nereids_p0/hint/test_leading.out b/regression-test/data/nereids_p0/hint/test_leading.out index 5eaf8a864c..ea760d8b2c 100644 --- a/regression-test/data/nereids_p0/hint/test_leading.out +++ b/regression-test/data/nereids_p0/hint/test_leading.out @@ -3,7 +3,7 @@ PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() --------PhysicalOlapScan[t2] --------PhysicalDistribute ----------PhysicalOlapScan[t1] @@ -16,7 +16,7 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() --------PhysicalOlapScan[t1] --------PhysicalDistribute ----------PhysicalOlapScan[t2] @@ -29,8 +29,8 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t2.c2 = t3.c3) ---------hashJoin[INNER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() ----------PhysicalOlapScan[t1] ----------PhysicalDistribute ------------PhysicalOlapScan[t2] @@ -45,13 +45,13 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t2.c2) ---------PhysicalOlapScan[t1] +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +----------PhysicalOlapScan[t2] +----------PhysicalDistribute +------------PhysicalOlapScan[t3] --------PhysicalDistribute -----------hashJoin[INNER_JOIN](t2.c2 = t3.c3) -------------PhysicalOlapScan[t2] -------------PhysicalDistribute ---------------PhysicalOlapScan[t3] +----------PhysicalOlapScan[t1] Used: leading(t1 { t2 t3 }) UnUsed: @@ -61,15 +61,14 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t3.c3 = t4.c4) ---------PhysicalDistribute -----------hashJoin[INNER_JOIN](t1.c1 = t2.c2) -------------PhysicalOlapScan[t1] +------hashJoin[INNER_JOIN] hashCondition=((t3.c3 = t4.c4))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() +----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +------------PhysicalOlapScan[t2] ------------PhysicalDistribute ---------------hashJoin[INNER_JOIN](t2.c2 = t3.c3) -----------------PhysicalOlapScan[t2] -----------------PhysicalDistribute -------------------PhysicalOlapScan[t3] +--------------PhysicalOlapScan[t3] +----------PhysicalDistribute +------------PhysicalOlapScan[t1] --------PhysicalDistribute ----------PhysicalOlapScan[t4] @@ -81,13 +80,13 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t2.c2 = t3.c3) ---------hashJoin[INNER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() ----------PhysicalOlapScan[t1] ----------PhysicalDistribute ------------PhysicalOlapScan[t2] --------PhysicalDistribute -----------hashJoin[INNER_JOIN](t3.c3 = t4.c4) +----------hashJoin[INNER_JOIN] hashCondition=((t3.c3 = t4.c4))otherCondition=() ------------PhysicalOlapScan[t3] ------------PhysicalDistribute --------------PhysicalOlapScan[t4] @@ -100,13 +99,13 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t2.c2 = t3.c3) ---------hashJoin[INNER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() ----------PhysicalOlapScan[t1] ----------PhysicalDistribute ------------PhysicalOlapScan[t2] --------PhysicalDistribute -----------hashJoin[INNER_JOIN](t3.c3 = t4.c4) +----------hashJoin[INNER_JOIN] hashCondition=((t3.c3 = t4.c4))otherCondition=() ------------PhysicalOlapScan[t3] ------------PhysicalDistribute --------------PhysicalOlapScan[t4] @@ -119,8 +118,8 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t3.c3) ---------hashJoin[LEFT_OUTER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3))otherCondition=() +--------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() ----------PhysicalOlapScan[t1] ----------PhysicalDistribute ------------PhysicalOlapScan[t2] @@ -131,8 +130,8 @@ PhysicalResultSink PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[LEFT_OUTER_JOIN](t1.c1 = t2.c2) ---------hashJoin[INNER_JOIN](t1.c1 = t3.c3) +------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3))otherCondition=() ----------PhysicalOlapScan[t1] ----------PhysicalDistribute ------------PhysicalOlapScan[t3] @@ -147,8 +146,8 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[LEFT_OUTER_JOIN](t1.c1 = t3.c3) ---------hashJoin[LEFT_OUTER_JOIN](t1.c1 = t2.c2) +------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t3.c3))otherCondition=() +--------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() ----------PhysicalOlapScan[t1] ----------PhysicalDistribute ------------PhysicalOlapScan[t2] @@ -159,8 +158,8 @@ PhysicalResultSink PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[LEFT_OUTER_JOIN](t1.c1 = t2.c2) ---------hashJoin[LEFT_OUTER_JOIN](t1.c1 = t3.c3) +------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() +--------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t3.c3))otherCondition=() ----------PhysicalOlapScan[t1] ----------PhysicalDistribute ------------PhysicalOlapScan[t3] @@ -175,14 +174,13 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[LEFT_OUTER_JOIN](t2.c2 = t3.c3) +------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() +--------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +----------PhysicalOlapScan[t2] +----------PhysicalDistribute +------------PhysicalOlapScan[t3] --------PhysicalDistribute -----------hashJoin[LEFT_OUTER_JOIN](t1.c1 = t2.c2) -------------PhysicalOlapScan[t1] -------------PhysicalDistribute ---------------PhysicalOlapScan[t2] ---------PhysicalDistribute -----------PhysicalOlapScan[t3] +----------PhysicalOlapScan[t1] Used: UnUsed: leading(t1 { t2 t3 }) @@ -192,13 +190,13 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t2.c2) ---------PhysicalOlapScan[t1] +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +----------PhysicalOlapScan[t2] +----------PhysicalDistribute +------------PhysicalOlapScan[t3] --------PhysicalDistribute -----------hashJoin[INNER_JOIN](t2.c2 = t3.c3) -------------PhysicalOlapScan[t2] -------------PhysicalDistribute ---------------PhysicalOlapScan[t3] +----------PhysicalOlapScan[t1] Used: leading(t1 { t2 t3 }) UnUsed: @@ -208,25 +206,25 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[LEFT_OUTER_JOIN](t1.c1 = tmp.c2) ---------PhysicalOlapScan[t1] +------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.c1 = tmp.c2))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +----------PhysicalOlapScan[t2] +----------PhysicalDistribute +------------PhysicalOlapScan[t3] --------PhysicalDistribute -----------hashJoin[INNER_JOIN](t2.c2 = t3.c3) -------------PhysicalOlapScan[t2] -------------PhysicalDistribute ---------------PhysicalOlapScan[t3] +----------PhysicalOlapScan[t1] -- !select15 -- PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[LEFT_OUTER_JOIN](t1.c1 = tmp.c2) ---------PhysicalOlapScan[t1] +------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.c1 = tmp.c2))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +----------PhysicalOlapScan[t2] +----------PhysicalDistribute +------------PhysicalOlapScan[t3] --------PhysicalDistribute -----------hashJoin[INNER_JOIN](t2.c2 = t3.c3) -------------PhysicalOlapScan[t2] -------------PhysicalDistribute ---------------PhysicalOlapScan[t3] +----------PhysicalOlapScan[t1] Used: UnUsed: leading(t1 t2 t3) @@ -236,7 +234,7 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[LEFT_SEMI_JOIN](t1.c1 = t2.c2) +------hashJoin[LEFT_SEMI_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() --------PhysicalOlapScan[t1] --------PhysicalDistribute ----------PhysicalProject @@ -246,11 +244,11 @@ PhysicalResultSink PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[RIGHT_SEMI_JOIN](t1.c1 = t2.c2) ---------PhysicalProject -----------PhysicalOlapScan[t2] +------hashJoin[LEFT_SEMI_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() +--------PhysicalOlapScan[t1] --------PhysicalDistribute -----------PhysicalOlapScan[t1] +----------PhysicalProject +------------PhysicalOlapScan[t2] Used: leading(t2 t1) UnUsed: @@ -274,10 +272,10 @@ PhysicalResultSink --PhysicalDistribute ----PhysicalProject ------NestedLoopJoin[CROSS_JOIN] ---------PhysicalProject -----------PhysicalOlapScan[t2] +--------PhysicalOlapScan[t1] --------PhysicalDistribute -----------PhysicalOlapScan[t1] +----------PhysicalProject +------------PhysicalOlapScan[t2] Used: leading(t2 t1) UnUsed: @@ -288,7 +286,7 @@ PhysicalResultSink --PhysicalDistribute ----PhysicalProject ------NestedLoopJoin[CROSS_JOIN] ---------hashJoin[INNER_JOIN](cte.c1 = cte.c2) +--------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = cte.c2))otherCondition=() ----------PhysicalProject ------------PhysicalOlapScan[t1] ----------PhysicalDistribute @@ -319,7 +317,7 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() --------PhysicalOlapScan[t1] --------PhysicalDistribute ----------PhysicalOlapScan[t2] @@ -332,7 +330,7 @@ SyntaxError: leading(t66 t1) Msg:can not find table: t66 PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() --------PhysicalOlapScan[t1] --------PhysicalDistribute ----------PhysicalOlapScan[t2] @@ -346,7 +344,7 @@ PhysicalResultSink --PhysicalDistribute ----PhysicalProject ------NestedLoopJoin[CROSS_JOIN] ---------hashJoin[INNER_JOIN](cte.c1 = cte.c2) +--------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = cte.c2))otherCondition=() ----------PhysicalProject ------------PhysicalOlapScan[t1] ----------PhysicalDistribute @@ -363,8 +361,8 @@ SyntaxError: leading(t2 cte t1) Msg:Leading alias can only be table name alias PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t2.c2 = t3.c3) ---------hashJoin[INNER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() ----------PhysicalOlapScan[t1] ----------PhysicalDistribute ------------PhysicalOlapScan[t2] @@ -379,8 +377,8 @@ SyntaxError: leading(t1 t2) Msg:tables should be same as join tables PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t2.c2 = t3.c3) ---------hashJoin[INNER_JOIN](t1.c1 = t2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3))otherCondition=() +--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2))otherCondition=() ----------PhysicalOlapScan[t1] ----------PhysicalDistribute ------------PhysicalOlapScan[t2] @@ -395,7 +393,7 @@ SyntaxError: leading(t1 t1 t2 t3) Msg:duplicated table PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t_2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t_2.c2))otherCondition=() --------PhysicalOlapScan[t1] --------PhysicalDistribute ----------PhysicalOlapScan[t2] @@ -408,7 +406,7 @@ SyntaxError: PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t_2.c2) +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t_2.c2))otherCondition=() --------PhysicalOlapScan[t1] --------PhysicalDistribute ----------PhysicalOlapScan[t2] @@ -421,10 +419,11 @@ SyntaxError: leading(t1 t2) Msg:can not find table: t2 PhysicalResultSink --PhysicalDistribute ----PhysicalProject -------hashJoin[INNER_JOIN](t1.c1 = t_1.c1) +------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t_1.c1))otherCondition=() --------PhysicalOlapScan[t1] --------PhysicalOlapScan[t1] Used: leading(t1 t_1) UnUsed: SyntaxError: + diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy b/regression-test/pipeline/p0/conf/regression-conf.groovy index 83956604da..4e48c53cd7 100644 --- a/regression-test/pipeline/p0/conf/regression-conf.groovy +++ b/regression-test/pipeline/p0/conf/regression-conf.groovy @@ -55,7 +55,7 @@ testDirectories = "" excludeGroups = "" // this suites will not be executed -excludeSuites = "test_sql_block_rule,test_ddl,test_leading,test_stream_load_move_memtable,test_profile,test_broker_load,test_spark_load,test_refresh_mtmv,test_bitmap_filter,nereids_delete_mow_partial_update" +excludeSuites = "test_sql_block_rule,test_ddl,test_stream_load_move_memtable,test_profile,test_broker_load,test_spark_load,test_refresh_mtmv,test_bitmap_filter,nereids_delete_mow_partial_update" // this directories will not be executed excludeDirectories = "workload_manager_p1" diff --git a/regression-test/suites/nereids_p0/hint/test_leading.groovy b/regression-test/suites/nereids_p0/hint/test_leading.groovy index fa782ed066..2464c2dd8f 100644 --- a/regression-test/suites/nereids_p0/hint/test_leading.groovy +++ b/regression-test/suites/nereids_p0/hint/test_leading.groovy @@ -38,6 +38,18 @@ suite("test_leading") { sql """create table t3 (c3 int, c33 int) distributed by hash(c3) buckets 3 properties('replication_num' = '1');""" sql """create table t4 (c4 int, c44 int) distributed by hash(c4) buckets 3 properties('replication_num' = '1');""" + sql ''' + alter table t1 modify column c1 set stats ('ndv'='1', 'avg_size'='1', 'max_size'='1', 'num_nulls'='0', 'min_value'='1', 'max_value'='1', 'row_count'='10000') + ''' + sql ''' + alter table t2 modify column c2 set stats ('ndv'='1', 'avg_size'='1', 'max_size'='1', 'num_nulls'='0', 'min_value'='1', 'max_value'='1', 'row_count'='10000') + ''' + sql ''' + alter table t3 modify column c3 set stats ('ndv'='1', 'avg_size'='1', 'max_size'='1', 'num_nulls'='0', 'min_value'='1', 'max_value'='1', 'row_count'='10000') + ''' + sql ''' + alter table t4 modify column c4 set stats ('ndv'='1', 'avg_size'='1', 'max_size'='1', 'num_nulls'='0', 'min_value'='1', 'max_value'='1', 'row_count'='10000') + ''' //// test inner join with all edge and vertax is complete and equal predicates qt_select1 """explain shape plan select /*+ leading(t2 t1) */ * from t1 join t2 on c1 = c2;""" qt_select2 """explain shape plan select /*+ leading(t1 t2) */ * from t1 join t2 on c1 = c2;"""