[feat](Nereids): trace enumeration of DPHyp (#19394)

This commit is contained in:
jakevin
2023-05-10 11:57:35 +08:00
committed by GitHub
parent fae2e5fd22
commit 553068f7be
4 changed files with 58 additions and 25 deletions

View File

@ -213,7 +213,7 @@ public class NereidsPlanner extends Planner {
// print memo before choose plan.
// if chooseNthPlan failed, we could get memo to debug
if (ConnectContext.get().getSessionVariable().isDumpNereidsMemo()) {
if (ConnectContext.get().getSessionVariable().dumpNereidsMemo) {
String memo = cascadesContext.getMemo().toString();
LOG.info(memo);
}
@ -223,7 +223,7 @@ public class NereidsPlanner extends Planner {
physicalPlan = postProcess(physicalPlan);
if (ConnectContext.get().getSessionVariable().isDumpNereidsMemo()) {
if (ConnectContext.get().getSessionVariable().dumpNereidsMemo) {
String tree = physicalPlan.treeString();
LOG.info(tree);
}

View File

@ -17,16 +17,22 @@
package org.apache.doris.nereids.jobs.joinorder.hypergraph;
import org.apache.doris.nereids.NereidsPlanner;
import org.apache.doris.nereids.jobs.joinorder.hypergraph.bitmap.LongBitmap;
import org.apache.doris.nereids.jobs.joinorder.hypergraph.bitmap.LongBitmapSubsetIterator;
import org.apache.doris.nereids.jobs.joinorder.hypergraph.receiver.AbstractReceiver;
import org.apache.doris.qe.ConnectContext;
import com.google.common.base.Preconditions;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.stream.Collectors;
/**
* This class enumerate all subgraph of HyperGraph. CSG means connected subgraph
@ -34,14 +40,20 @@ import java.util.List;
* More details are in Paper: Dynamic Programming Strikes Back and Build Query Optimizer.
*/
public class SubgraphEnumerator {
//The receiver receives the csg and cmp and record them, named DPTable in paper
public static final Logger LOG = LogManager.getLogger(NereidsPlanner.class);
// The receiver receives the csg and cmp and record them, named DPTable in paper
AbstractReceiver receiver;
//The enumerated hyperGraph
// The enumerated hyperGraph
HyperGraph hyperGraph;
EdgeCalculator edgeCalculator;
NeighborhoodCalculator neighborhoodCalculator;
// These caches are used to avoid repetitive computation
// trace enumerate
private final boolean enableTrace = ConnectContext.get().getSessionVariable().enableDpHypTrace;
private final StringBuilder traceBuilder = new StringBuilder();
public SubgraphEnumerator(AbstractReceiver receiver, HyperGraph hyperGraph) {
this.receiver = receiver;
this.hyperGraph = hyperGraph;
@ -53,6 +65,9 @@ public class SubgraphEnumerator {
* @return whether the hyperGraph is enumerated successfully
*/
public boolean enumerate() {
if (enableTrace) {
traceBuilder.append("Query Graph Graphviz: ").append(hyperGraph.toDottyHyperGraph()).append("\n");
}
receiver.reset();
List<Node> nodes = hyperGraph.getNodes();
// Init all nodes in Receiver
@ -73,12 +88,18 @@ public class SubgraphEnumerator {
// We skip the last element because it can't generate valid csg-cmp pair
long forbiddenNodes = LongBitmap.newBitmapBetween(0, size - 1);
for (int i = size - 2; i >= 0; i--) {
if (enableTrace) {
traceBuilder.append("Starting main iteration at node[").append(i).append("]\n");
}
long csg = LongBitmap.newBitmap(i);
forbiddenNodes = LongBitmap.unset(forbiddenNodes, i);
if (!emitCsg(csg) || !enumerateCsgRec(csg, LongBitmap.clone(forbiddenNodes))) {
return false;
}
}
if (enableTrace) {
LOG.info(traceBuilder.toString());
}
return true;
}
@ -87,6 +108,11 @@ public class SubgraphEnumerator {
private boolean enumerateCsgRec(long csg, long forbiddenNodes) {
long neighborhood = neighborhoodCalculator.calcNeighborhood(csg, forbiddenNodes, edgeCalculator);
LongBitmapSubsetIterator subsetIterator = LongBitmap.getSubsetIterator(neighborhood);
if (enableTrace) {
traceBuilder.append("Expanding connected subgraph, subgraph=[").append(LongBitmap.toString(csg))
.append("], neighborhood=[").append(LongBitmap.toString(neighborhood)).append("], forbidden=[")
.append(LongBitmap.toString(forbiddenNodes)).append("]\n");
}
for (long subset : subsetIterator) {
long newCsg = LongBitmap.newBitmapUnion(csg, subset);
edgeCalculator.unionEdges(csg, subset);
@ -110,6 +136,11 @@ public class SubgraphEnumerator {
private boolean enumerateCmpRec(long csg, long cmp, long forbiddenNodes) {
long neighborhood = neighborhoodCalculator.calcNeighborhood(cmp, forbiddenNodes, edgeCalculator);
LongBitmapSubsetIterator subsetIterator = new LongBitmapSubsetIterator(neighborhood);
if (enableTrace) {
traceBuilder.append("Expanding complement subgraph, subgraph=[").append(LongBitmap.toString(cmp))
.append("], neighborhood=[").append(LongBitmap.toString(neighborhood)).append("], forbidden=[")
.append(LongBitmap.toString(forbiddenNodes)).append("]\n");
}
for (long subset : subsetIterator) {
long newCmp = LongBitmap.newBitmapUnion(cmp, subset);
// We need to check whether Cmp is connected and then try to find hyper edge
@ -144,11 +175,15 @@ public class SubgraphEnumerator {
forbiddenNodes = LongBitmap.or(forbiddenNodes, csg);
long neighborhoods = neighborhoodCalculator.calcNeighborhood(csg, LongBitmap.clone(forbiddenNodes),
edgeCalculator);
if (enableTrace && LongBitmap.getCardinality(csg) == 1) {
traceBuilder.append("Emitting connected subgraph, subgraph=[").append(LongBitmap.toString(csg))
.append("], neighborhood=[").append(LongBitmap.toString(neighborhoods)).append("], forbidden=[")
.append(LongBitmap.toString(forbiddenNodes)).append("]\n");
}
for (int nodeIndex : LongBitmap.getReverseIterator(neighborhoods)) {
long cmp = LongBitmap.newBitmap(nodeIndex);
// whether there is an edge between csg and cmp
List<Edge> edges = edgeCalculator.connectCsgCmp(csg, cmp);
if (!edges.isEmpty()) {
if (!receiver.emitCsgCmp(csg, cmp, edges)) {
return false;
@ -174,7 +209,7 @@ public class SubgraphEnumerator {
return true;
}
class NeighborhoodCalculator {
static class NeighborhoodCalculator {
// This function is used to calculate neighborhoods of given subgraph.
// Though a direct way is to add all nodes u that satisfies:
// <u, v> \in E && v \in subgraph && v \intersect X = empty
@ -203,7 +238,7 @@ public class SubgraphEnumerator {
}
}
class EdgeCalculator {
static class EdgeCalculator {
final List<Edge> edges;
// It cached all edges that contained by this subgraph, Note we always
// use bitset store edge map because the number of edges can be very large
@ -305,37 +340,26 @@ public class SubgraphEnumerator {
}
public List<Edge> foundEdgesContain(long subgraph) {
Preconditions.checkArgument(containSimpleEdges.containsKey(subgraph));
BitSet edgeMap = containSimpleEdges.get(subgraph);
Preconditions.checkState(edgeMap != null);
edgeMap.or(containComplexEdges.get(subgraph));
List<Edge> foundEdges = new ArrayList<>();
edgeMap.stream().forEach(index -> foundEdges.add(edges.get(index)));
return foundEdges;
return edgeMap.stream().mapToObj(edges::get).collect(Collectors.toList());
}
public List<Edge> foundSimpleEdgesContain(long subgraph) {
List<Edge> foundEdges = new ArrayList<>();
if (!containSimpleEdges.containsKey(subgraph)) {
return foundEdges;
return Collections.emptyList();
}
BitSet edgeMap = containSimpleEdges.get(subgraph);
edgeMap.stream().forEach(index -> foundEdges.add(edges.get(index)));
return foundEdges;
return edgeMap.stream().mapToObj(edges::get).collect(Collectors.toList());
}
public List<Edge> foundComplexEdgesContain(long subgraph) {
List<Edge> foundEdges = new ArrayList<>();
if (!containComplexEdges.containsKey(subgraph)) {
return foundEdges;
return Collections.emptyList();
}
BitSet edgeMap = containComplexEdges.get(subgraph);
edgeMap.stream().forEach(index -> foundEdges.add(edges.get(index)));
return foundEdges;
}
public int getEdgeSizeContain(long subgraph) {
Preconditions.checkArgument(containSimpleEdges.containsKey(subgraph));
return containSimpleEdges.get(subgraph).cardinality() + containSimpleEdges.get(subgraph).cardinality();
return edgeMap.stream().mapToObj(edges::get).collect(Collectors.toList());
}
private boolean isContainEdge(long subgraph, Edge edge) {

View File

@ -87,6 +87,10 @@ public class PlanReceiver implements AbstractReceiver {
/**
* Emit a new plan from bottom to top
* <p>
* The purpose of EmitCsgCmp is to combine the optimal plans for S1 and S2 into a csg-cmp-pair.
* It requires calculating the proper join predicate and costs of the resulting joins.
* In the end, update dpTables.
*
* @param left the bitmap of left child tree
* @param right the bitmap of the right child tree
@ -296,8 +300,8 @@ public class PlanReceiver implements AbstractReceiver {
@Override
public Group getBestPlan(long bitmap) {
Preconditions.checkArgument(planTable.containsKey(bitmap));
Group root = planTable.get(bitmap);
Preconditions.checkState(root != null);
// If there are some rules relied on the logical join, we need to make logical Expression
// However, it cost 15% of total optimized time.
makeLogicalExpression(root);

View File

@ -226,6 +226,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String NEREIDS_CBO_PENALTY_FACTOR = "nereids_cbo_penalty_factor";
public static final String ENABLE_NEREIDS_TRACE = "enable_nereids_trace";
public static final String ENABLE_DPHYP_TRACE = "enable_dphyp_trace";
public static final String ENABLE_RUNTIME_FILTER_PRUNE =
"enable_runtime_filter_prune";
@ -655,6 +657,9 @@ public class SessionVariable implements Serializable, Writable {
@VariableMgr.VarAttr(name = ENABLE_NEREIDS_TRACE)
private boolean enableNereidsTrace = false;
@VariableMgr.VarAttr(name = ENABLE_DPHYP_TRACE, needForward = true)
public boolean enableDpHypTrace = false;
@VariableMgr.VarAttr(name = BROADCAST_RIGHT_TABLE_SCALE_FACTOR)
private double broadcastRightTableScaleFactor = 10.0;