diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 334983b0c2..e85babed40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -213,7 +213,7 @@ public class NereidsPlanner extends Planner { // print memo before choose plan. // if chooseNthPlan failed, we could get memo to debug - if (ConnectContext.get().getSessionVariable().isDumpNereidsMemo()) { + if (ConnectContext.get().getSessionVariable().dumpNereidsMemo) { String memo = cascadesContext.getMemo().toString(); LOG.info(memo); } @@ -223,7 +223,7 @@ public class NereidsPlanner extends Planner { physicalPlan = postProcess(physicalPlan); - if (ConnectContext.get().getSessionVariable().isDumpNereidsMemo()) { + if (ConnectContext.get().getSessionVariable().dumpNereidsMemo) { String tree = physicalPlan.treeString(); LOG.info(tree); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/SubgraphEnumerator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/SubgraphEnumerator.java index 8230353069..f3b9c8c74c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/SubgraphEnumerator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/SubgraphEnumerator.java @@ -17,16 +17,22 @@ package org.apache.doris.nereids.jobs.joinorder.hypergraph; +import org.apache.doris.nereids.NereidsPlanner; import org.apache.doris.nereids.jobs.joinorder.hypergraph.bitmap.LongBitmap; import org.apache.doris.nereids.jobs.joinorder.hypergraph.bitmap.LongBitmapSubsetIterator; import org.apache.doris.nereids.jobs.joinorder.hypergraph.receiver.AbstractReceiver; +import org.apache.doris.qe.ConnectContext; import com.google.common.base.Preconditions; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.util.ArrayList; import java.util.BitSet; +import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.stream.Collectors; /** * This class enumerate all subgraph of HyperGraph. CSG means connected subgraph @@ -34,14 +40,20 @@ import java.util.List; * More details are in Paper: Dynamic Programming Strikes Back and Build Query Optimizer. */ public class SubgraphEnumerator { - //The receiver receives the csg and cmp and record them, named DPTable in paper + public static final Logger LOG = LogManager.getLogger(NereidsPlanner.class); + + // The receiver receives the csg and cmp and record them, named DPTable in paper AbstractReceiver receiver; - //The enumerated hyperGraph + // The enumerated hyperGraph HyperGraph hyperGraph; EdgeCalculator edgeCalculator; NeighborhoodCalculator neighborhoodCalculator; // These caches are used to avoid repetitive computation + // trace enumerate + private final boolean enableTrace = ConnectContext.get().getSessionVariable().enableDpHypTrace; + private final StringBuilder traceBuilder = new StringBuilder(); + public SubgraphEnumerator(AbstractReceiver receiver, HyperGraph hyperGraph) { this.receiver = receiver; this.hyperGraph = hyperGraph; @@ -53,6 +65,9 @@ public class SubgraphEnumerator { * @return whether the hyperGraph is enumerated successfully */ public boolean enumerate() { + if (enableTrace) { + traceBuilder.append("Query Graph Graphviz: ").append(hyperGraph.toDottyHyperGraph()).append("\n"); + } receiver.reset(); List nodes = hyperGraph.getNodes(); // Init all nodes in Receiver @@ -73,12 +88,18 @@ public class SubgraphEnumerator { // We skip the last element because it can't generate valid csg-cmp pair long forbiddenNodes = LongBitmap.newBitmapBetween(0, size - 1); for (int i = size - 2; i >= 0; i--) { + if (enableTrace) { + traceBuilder.append("Starting main iteration at node[").append(i).append("]\n"); + } long csg = LongBitmap.newBitmap(i); forbiddenNodes = LongBitmap.unset(forbiddenNodes, i); if (!emitCsg(csg) || !enumerateCsgRec(csg, LongBitmap.clone(forbiddenNodes))) { return false; } } + if (enableTrace) { + LOG.info(traceBuilder.toString()); + } return true; } @@ -87,6 +108,11 @@ public class SubgraphEnumerator { private boolean enumerateCsgRec(long csg, long forbiddenNodes) { long neighborhood = neighborhoodCalculator.calcNeighborhood(csg, forbiddenNodes, edgeCalculator); LongBitmapSubsetIterator subsetIterator = LongBitmap.getSubsetIterator(neighborhood); + if (enableTrace) { + traceBuilder.append("Expanding connected subgraph, subgraph=[").append(LongBitmap.toString(csg)) + .append("], neighborhood=[").append(LongBitmap.toString(neighborhood)).append("], forbidden=[") + .append(LongBitmap.toString(forbiddenNodes)).append("]\n"); + } for (long subset : subsetIterator) { long newCsg = LongBitmap.newBitmapUnion(csg, subset); edgeCalculator.unionEdges(csg, subset); @@ -110,6 +136,11 @@ public class SubgraphEnumerator { private boolean enumerateCmpRec(long csg, long cmp, long forbiddenNodes) { long neighborhood = neighborhoodCalculator.calcNeighborhood(cmp, forbiddenNodes, edgeCalculator); LongBitmapSubsetIterator subsetIterator = new LongBitmapSubsetIterator(neighborhood); + if (enableTrace) { + traceBuilder.append("Expanding complement subgraph, subgraph=[").append(LongBitmap.toString(cmp)) + .append("], neighborhood=[").append(LongBitmap.toString(neighborhood)).append("], forbidden=[") + .append(LongBitmap.toString(forbiddenNodes)).append("]\n"); + } for (long subset : subsetIterator) { long newCmp = LongBitmap.newBitmapUnion(cmp, subset); // We need to check whether Cmp is connected and then try to find hyper edge @@ -144,11 +175,15 @@ public class SubgraphEnumerator { forbiddenNodes = LongBitmap.or(forbiddenNodes, csg); long neighborhoods = neighborhoodCalculator.calcNeighborhood(csg, LongBitmap.clone(forbiddenNodes), edgeCalculator); + if (enableTrace && LongBitmap.getCardinality(csg) == 1) { + traceBuilder.append("Emitting connected subgraph, subgraph=[").append(LongBitmap.toString(csg)) + .append("], neighborhood=[").append(LongBitmap.toString(neighborhoods)).append("], forbidden=[") + .append(LongBitmap.toString(forbiddenNodes)).append("]\n"); + } for (int nodeIndex : LongBitmap.getReverseIterator(neighborhoods)) { long cmp = LongBitmap.newBitmap(nodeIndex); // whether there is an edge between csg and cmp List edges = edgeCalculator.connectCsgCmp(csg, cmp); - if (!edges.isEmpty()) { if (!receiver.emitCsgCmp(csg, cmp, edges)) { return false; @@ -174,7 +209,7 @@ public class SubgraphEnumerator { return true; } - class NeighborhoodCalculator { + static class NeighborhoodCalculator { // This function is used to calculate neighborhoods of given subgraph. // Though a direct way is to add all nodes u that satisfies: // \in E && v \in subgraph && v \intersect X = empty @@ -203,7 +238,7 @@ public class SubgraphEnumerator { } } - class EdgeCalculator { + static class EdgeCalculator { final List edges; // It cached all edges that contained by this subgraph, Note we always // use bitset store edge map because the number of edges can be very large @@ -305,37 +340,26 @@ public class SubgraphEnumerator { } public List foundEdgesContain(long subgraph) { - Preconditions.checkArgument(containSimpleEdges.containsKey(subgraph)); BitSet edgeMap = containSimpleEdges.get(subgraph); + Preconditions.checkState(edgeMap != null); edgeMap.or(containComplexEdges.get(subgraph)); - List foundEdges = new ArrayList<>(); - edgeMap.stream().forEach(index -> foundEdges.add(edges.get(index))); - return foundEdges; + return edgeMap.stream().mapToObj(edges::get).collect(Collectors.toList()); } public List foundSimpleEdgesContain(long subgraph) { - List foundEdges = new ArrayList<>(); if (!containSimpleEdges.containsKey(subgraph)) { - return foundEdges; + return Collections.emptyList(); } BitSet edgeMap = containSimpleEdges.get(subgraph); - edgeMap.stream().forEach(index -> foundEdges.add(edges.get(index))); - return foundEdges; + return edgeMap.stream().mapToObj(edges::get).collect(Collectors.toList()); } public List foundComplexEdgesContain(long subgraph) { - List foundEdges = new ArrayList<>(); if (!containComplexEdges.containsKey(subgraph)) { - return foundEdges; + return Collections.emptyList(); } BitSet edgeMap = containComplexEdges.get(subgraph); - edgeMap.stream().forEach(index -> foundEdges.add(edges.get(index))); - return foundEdges; - } - - public int getEdgeSizeContain(long subgraph) { - Preconditions.checkArgument(containSimpleEdges.containsKey(subgraph)); - return containSimpleEdges.get(subgraph).cardinality() + containSimpleEdges.get(subgraph).cardinality(); + return edgeMap.stream().mapToObj(edges::get).collect(Collectors.toList()); } private boolean isContainEdge(long subgraph, Edge edge) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java index ad6e8a3252..892e963810 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java @@ -87,6 +87,10 @@ public class PlanReceiver implements AbstractReceiver { /** * Emit a new plan from bottom to top + *

+ * The purpose of EmitCsgCmp is to combine the optimal plans for S1 and S2 into a csg-cmp-pair. + * It requires calculating the proper join predicate and costs of the resulting joins. + * In the end, update dpTables. * * @param left the bitmap of left child tree * @param right the bitmap of the right child tree @@ -296,8 +300,8 @@ public class PlanReceiver implements AbstractReceiver { @Override public Group getBestPlan(long bitmap) { - Preconditions.checkArgument(planTable.containsKey(bitmap)); Group root = planTable.get(bitmap); + Preconditions.checkState(root != null); // If there are some rules relied on the logical join, we need to make logical Expression // However, it cost 15% of total optimized time. makeLogicalExpression(root); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index ec5c59ebee..731635b5c8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -226,6 +226,8 @@ public class SessionVariable implements Serializable, Writable { public static final String NEREIDS_CBO_PENALTY_FACTOR = "nereids_cbo_penalty_factor"; public static final String ENABLE_NEREIDS_TRACE = "enable_nereids_trace"; + public static final String ENABLE_DPHYP_TRACE = "enable_dphyp_trace"; + public static final String ENABLE_RUNTIME_FILTER_PRUNE = "enable_runtime_filter_prune"; @@ -655,6 +657,9 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_NEREIDS_TRACE) private boolean enableNereidsTrace = false; + @VariableMgr.VarAttr(name = ENABLE_DPHYP_TRACE, needForward = true) + public boolean enableDpHypTrace = false; + @VariableMgr.VarAttr(name = BROADCAST_RIGHT_TABLE_SCALE_FACTOR) private double broadcastRightTableScaleFactor = 10.0;