[fix](nereids) dphyper join reorder may lost some join conjuncts (#19318)

This commit is contained in:
starocean999
2023-05-10 19:02:35 +08:00
committed by GitHub
parent 337732ae01
commit d0a8cd0fc5
5 changed files with 157 additions and 31 deletions

View File

@ -54,6 +54,7 @@ import org.apache.doris.planner.ScanNode;
import org.apache.doris.qe.ConnectContext;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import io.opentelemetry.api.trace.Span;
@ -270,16 +271,23 @@ public class NereidsPlanner extends Planner {
private void dpHypOptimize() {
Group root = getRoot();
boolean changeRoot = false;
if (root.isInnerJoinGroup()) {
// If the root group is join group, DPHyp can change the root group.
// To keep the root group is not changed, we add a project operator above join
List<NamedExpression> outputs = ImmutableList.copyOf(root.getLogicalExpression().getPlan().getOutput());
LogicalPlan plan = new LogicalProject<>(outputs, root.getLogicalExpression().getPlan());
CopyInResult copyInResult = cascadesContext.getMemo().copyIn(plan, null, false);
CopyInResult copyInResult = cascadesContext.getMemo().copyIn(plan, null, true);
root = copyInResult.correspondingExpression.getOwnerGroup();
Preconditions.checkArgument(copyInResult.generateNewExpression,
"the top project node can't be generated for dpHypOptimize");
changeRoot = true;
}
cascadesContext.pushJob(new JoinOrderJob(root, cascadesContext.getCurrentJobContext()));
cascadesContext.getJobScheduler().executeJobPool(cascadesContext);
if (changeRoot) {
cascadesContext.getMemo().setRoot(root.getLogicalExpression().child(0));
}
}
/**

View File

@ -102,11 +102,7 @@ public class PlanReceiver implements AbstractReceiver {
Preconditions.checkArgument(planTable.containsKey(left));
Preconditions.checkArgument(planTable.containsKey(right));
// check if the missed edges can be correctly connected by add it to edges
// if not, the plan is invalid because of the missed edges, just return and seek for another valid plan
if (!processMissedEdges(left, right, edges)) {
return true;
}
processMissedEdges(left, right, edges);
Memo memo = jobContext.getCascadesContext().getMemo();
emitCount += 1;
@ -165,37 +161,27 @@ public class PlanReceiver implements AbstractReceiver {
return outputSlots;
}
// check if the missed edges can be used to connect left and right together with edges
// return true if no missed edge or the missed edge can be used to connect left and right
// the returned edges includes missed edges if there is any.
private boolean processMissedEdges(long left, long right, List<Edge> edges) {
boolean canAddMisssedEdges = true;
// find all reference nodes assume left and right sub graph is connected
// add any missed edge into edges to connect left and right
private void processMissedEdges(long left, long right, List<Edge> edges) {
// find all used edges
BitSet usedEdgesBitmap = new BitSet();
usedEdgesBitmap.or(usdEdges.get(left));
usedEdgesBitmap.or(usdEdges.get(right));
edges.forEach(edge -> usedEdgesBitmap.set(edge.getIndex()));
long allReferenceNodes = getAllReferenceNodes(usedEdgesBitmap);
// check all edges
// the edge is a missed edge if the edge is not used and its reference nodes is a subset of allReferenceNodes
// find all referenced nodes
long allReferenceNodes = LongBitmap.or(left, right);
// find the edge which is not in usedEdgesBitmap and its referenced nodes is subset of allReferenceNodes
for (Edge edge : hyperGraph.getEdges()) {
if (LongBitmap.isSubset(edge.getReferenceNodes(), allReferenceNodes) && !usedEdgesBitmap.get(
edge.getIndex())) {
// check the missed edge can be used to connect left and right together with edges
// if the missed edge meet the 2 conditions, it is a valid edge
// 1. the edge's left child's referenced nodes is subset of the left
// 2. the edge's original right node is subset of right
canAddMisssedEdges = canAddMisssedEdges && LongBitmap.isSubset(edge.getLeft(),
left) && LongBitmap.isSubset(edge.getOriginalRight(), right);
// always add the missed edge to edges
// because the caller will return immediately if canAddMisssedEdges is false
long referenceNodes =
LongBitmap.newBitmapUnion(edge.getOriginalLeft(), edge.getOriginalRight());
if (LongBitmap.isSubset(referenceNodes, allReferenceNodes)
&& !usedEdgesBitmap.get(edge.getIndex())) {
// add the missed edge to edges
edges.add(edge);
}
}
return canAddMisssedEdges;
}
private long getAllReferenceNodes(BitSet edgesBitmap) {

View File

@ -21,6 +21,7 @@ import org.apache.doris.common.Pair;
import org.apache.doris.nereids.cost.Cost;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
@ -368,8 +369,11 @@ public class Group {
public boolean isInnerJoinGroup() {
Plan plan = getLogicalExpression().getPlan();
if (plan instanceof LogicalJoin) {
// Right now, we only support inner join
return ((LogicalJoin) plan).getJoinType() == JoinType.INNER_JOIN;
// Right now, we only support inner join with some join conditions
return ((LogicalJoin) plan).getJoinType() == JoinType.INNER_JOIN
&& (((LogicalJoin) plan).getOtherJoinConjuncts().isEmpty()
|| !(((LogicalJoin) plan).getOtherJoinConjuncts()
.get(0) instanceof Literal));
}
return false;
}

View File

@ -524,7 +524,9 @@ public class Memo {
}
public Group newGroup(LogicalProperties logicalProperties) {
return new Group(groupIdGenerator.getNextId(), logicalProperties);
Group group = new Group(groupIdGenerator.getNextId(), logicalProperties);
groups.put(group.getGroupId(), group);
return group;
}
// This function is used to copy new group expression