From d7e02a95147101ed0be7f99368d4d6d6392ecf85 Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Thu, 1 Sep 2022 09:46:01 +0800 Subject: [PATCH] [fix](join)join reorder by mistake (#12113) --- .../org/apache/doris/analysis/FromClause.java | 43 ++++++++- .../org/apache/doris/analysis/SelectStmt.java | 25 +++-- .../test_join_should_not_reorder.out | 3 + .../test_join_should_not_reorder.groovy | 94 +++++++++++++++++++ 4 files changed, 150 insertions(+), 15 deletions(-) create mode 100644 regression-test/data/correctness_p0/test_join_should_not_reorder.out create mode 100644 regression-test/suites/correctness_p0/test_join_should_not_reorder.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FromClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FromClause.java index a985d092d9..e7470d1855 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FromClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FromClause.java @@ -44,12 +44,24 @@ public class FromClause implements ParseNode, Iterable { private boolean analyzed = false; private boolean needToSql = false; + // the tables positions may be changed by 'join reorder' optimization + // after reset, the original order information is lost + // in the next re-analyze phase, the mis-ordered tables may lead to 'unable to find column xxx' error + // now we use originalTableRefOrders to keep track of table order information + // so that in reset method, we can recover the original table orders. + private final ArrayList originalTableRefOrders = new ArrayList(); + public FromClause(List tableRefs) { tablerefs = Lists.newArrayList(tableRefs); // Set left table refs to ensure correct toSql() before analysis. for (int i = 1; i < tablerefs.size(); ++i) { tablerefs.get(i).setLeftTblRef(tablerefs.get(i - 1)); } + // save the tableRef's order, will use in reset method later + originalTableRefOrders.clear(); + for (int i = 0; i < tablerefs.size(); ++i) { + originalTableRefOrders.add(tablerefs.get(i)); + } } public FromClause() { @@ -129,6 +141,12 @@ public class FromClause implements ParseNode, Iterable { changeTblRefToNullable(analyzer); analyzed = true; + + // save the tableRef's order, will use in reset method later + originalTableRefOrders.clear(); + for (int i = 0; i < tablerefs.size(); ++i) { + originalTableRefOrders.add(tablerefs.get(i)); + } } // set null-side inlinve view column @@ -154,13 +172,21 @@ public class FromClause implements ParseNode, Iterable { for (TableRef tblRef : tablerefs) { clone.add(tblRef.clone()); } - return new FromClause(clone); + FromClause result = new FromClause(clone); + for (int i = 0; i < clone.size(); ++i) { + result.originalTableRefOrders.add(clone.get(i)); + } + return result; } public void reset() { for (int i = 0; i < size(); ++i) { get(i).reset(); } + // recover original table orders + for (int i = 0; i < size(); ++i) { + tablerefs.set(i, originalTableRefOrders.get(i)); + } this.analyzed = false; } @@ -206,17 +232,32 @@ public class FromClause implements ParseNode, Iterable { public void set(int i, TableRef tableRef) { tablerefs.set(i, tableRef); + originalTableRefOrders.set(i, tableRef); } public void add(TableRef t) { tablerefs.add(t); + // join reorder will call add method after call clear method. + // we want to keep tableRefPositions unchanged in that case + // in other cases, tablerefs.size() would larger than tableRefPositions.size() + // then we can update tableRefPositions. same logic in addAll method. + if (tablerefs.size() > originalTableRefOrders.size()) { + originalTableRefOrders.add(t); + } } public void addAll(List t) { tablerefs.addAll(t); + if (tablerefs.size() > originalTableRefOrders.size()) { + for (int i = originalTableRefOrders.size(); i < tablerefs.size(); ++i) { + originalTableRefOrders.add(tablerefs.get(i)); + } + } } public void clear() { + // this method on be called in reorder table + // we want to keep tableRefPositions, only clear tablerefs tablerefs.clear(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index 8e4ae4eaa2..f6230865fd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -733,12 +733,12 @@ public class SelectStmt extends QueryStmt { protected void reorderTable(Analyzer analyzer) throws AnalysisException { List> candidates = Lists.newArrayList(); - List originOrderBackUp = Lists.newArrayList(fromClause.getTableRefs()); + ArrayList originOrderBackUp = Lists.newArrayList(fromClause.getTableRefs()); // New pair of table ref and row count for (TableRef tblRef : fromClause) { if (tblRef.getJoinOp() != JoinOperator.INNER_JOIN || tblRef.hasJoinHints()) { // Unsupported reorder outer join - return; + break; } long rowCount = 0; if (tblRef.getTable().getType() == TableType.OLAP) { @@ -747,6 +747,11 @@ public class SelectStmt extends QueryStmt { } candidates.add(Pair.of(tblRef, rowCount)); } + int reorderTableCount = candidates.size(); + if (reorderTableCount < originOrderBackUp.size()) { + fromClause.clear(); + fromClause.addAll(originOrderBackUp.subList(0, reorderTableCount)); + } // give InlineView row count long last = 0; for (int i = candidates.size() - 1; i >= 0; --i) { @@ -765,6 +770,9 @@ public class SelectStmt extends QueryStmt { // as long as one scheme success, we return this scheme immediately. // in this scheme, candidate.first will be consider to be the big table in star schema. // this scheme might not be fit for snowflake schema. + if (reorderTableCount < originOrderBackUp.size()) { + fromClause.addAll(originOrderBackUp.subList(reorderTableCount, originOrderBackUp.size())); + } return; } } @@ -819,18 +827,7 @@ public class SelectStmt extends QueryStmt { List candidateEqJoinPredicates = analyzer.getEqJoinConjunctsExcludeAuxPredicates(tid); for (Expr candidateEqJoinPredicate : candidateEqJoinPredicates) { List candidateTupleList = Lists.newArrayList(); - List candidateEqJoinPredicateList = Lists.newArrayList(candidateEqJoinPredicate); - // If a large table or view has joinClause is ranked first, - // and the joinClause is not judged here, - // the column in joinClause may not be found during reanalyzing. - // for example: - // select * from t1 inner join t2 on t1.a = t2.b inner join t3 on t3.c = t2.b; - // If t3 is a large table, it will be placed first after the reorderTable, - // and the problem that t2.b does not exist will occur in reanalyzing - if (candidateTableRef.getOnClause() != null) { - candidateEqJoinPredicateList.add(candidateTableRef.getOnClause()); - } - Expr.getIds(candidateEqJoinPredicateList, candidateTupleList, null); + Expr.getIds(Lists.newArrayList(candidateEqJoinPredicate), candidateTupleList, null); int count = candidateTupleList.size(); for (TupleId tupleId : candidateTupleList) { if (validTupleId.contains(tupleId) || tid.equals(tupleId)) { diff --git a/regression-test/data/correctness_p0/test_join_should_not_reorder.out b/regression-test/data/correctness_p0/test_join_should_not_reorder.out new file mode 100644 index 0000000000..f958424e65 --- /dev/null +++ b/regression-test/data/correctness_p0/test_join_should_not_reorder.out @@ -0,0 +1,3 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- + diff --git a/regression-test/suites/correctness_p0/test_join_should_not_reorder.groovy b/regression-test/suites/correctness_p0/test_join_should_not_reorder.groovy new file mode 100644 index 0000000000..c5be9241d8 --- /dev/null +++ b/regression-test/suites/correctness_p0/test_join_should_not_reorder.groovy @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_join_should_not_reorder") { + sql """ + drop table if exists reorder_a; + """ + + sql """ + drop table if exists reorder_b; + """ + + sql """ + drop table if exists reorder_c; + """ + + sql """ + CREATE TABLE `reorder_a` ( + `id` varchar(10), + `name` varchar(10), + `dt` date + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + CREATE TABLE `reorder_b` ( + `id` varchar(10), + `name` varchar(10), + `dt1` date, + `dt2` date + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + CREATE TABLE `reorder_c` ( + `id` varchar(10), + `name` varchar(10), + `dt` date + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + qt_select """ + select * + from reorder_a + join reorder_b + on reorder_a.dt between reorder_b.dt1 and reorder_b.dt2 + join reorder_c + on reorder_c.id = reorder_a.id and reorder_c.id = reorder_b.id; + """ + + sql """ + drop table if exists reorder_a; + """ + + sql """ + drop table if exists reorder_b; + """ + + sql """ + drop table if exists reorder_c; + """ +}