[improvement](nereids) Simplify ScanNode projection handling by removing redundant conditions (#40801) (#41315)

pick from master #40801

This PR simplifies the handling of `ScanNode` projection logic.
Previously, the code included multiple conditional checks to determine
whether a `projectionTuple` should be generated. These conditions have
been removed, and now `projectionTuple `is always generated for
`ScanNode`, ensuring a consistent projection setup. Additionally,
redundant handling of `SlotId` and `SlotRef` has been eliminated, making
the code cleaner and easier to maintain. The behavior for `OlapScanNode`
remains unchanged.
This commit is contained in:
zy-kkk
2024-09-26 10:35:01 +08:00
committed by GitHub
parent a11fd62043
commit 4deda2fce7
3 changed files with 23 additions and 38 deletions

View File

@ -1951,21 +1951,10 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
// slotIdsByOrder is used to ensure the ScanNode's output order is same with current Project
// if we change the output order in translate project, the upper node will receive wrong order
// tuple, since they get the order from project.getOutput() not scan.getOutput()./
List<SlotId> slotIdsByOrder = Lists.newArrayList();
if (requiredByProjectSlotIdSet.size() != requiredSlotIdSet.size()
|| new HashSet<>(projectionExprs).size() != projectionExprs.size()
|| projectionExprs.stream().anyMatch(expr -> !(expr instanceof SlotRef))) {
projectionTuple = generateTupleDesc(slots,
((ScanNode) inputPlanNode).getTupleDesc().getTable(), context);
inputPlanNode.setProjectList(projectionExprs);
inputPlanNode.setOutputTupleDesc(projectionTuple);
} else {
for (int i = 0; i < slots.size(); ++i) {
context.addExprIdSlotRefPair(slots.get(i).getExprId(),
(SlotRef) projectionExprs.get(i));
slotIdsByOrder.add(((SlotRef) projectionExprs.get(i)).getSlotId());
}
}
projectionTuple = generateTupleDesc(slots,
((ScanNode) inputPlanNode).getTupleDesc().getTable(), context);
inputPlanNode.setProjectList(projectionExprs);
inputPlanNode.setOutputTupleDesc(projectionTuple);
// TODO: this is a temporary scheme to support two phase read when has project.
// we need to refactor all topn opt into rbo stage.
@ -1975,20 +1964,16 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
SlotDescriptor lastSlot = olapScanSlots.get(olapScanSlots.size() - 1);
if (lastSlot.getColumn() != null
&& lastSlot.getColumn().getName().equals(Column.ROWID_COL)) {
if (projectionTuple != null) {
injectRowIdColumnSlot(projectionTuple);
SlotRef slotRef = new SlotRef(lastSlot);
inputPlanNode.getProjectList().add(slotRef);
requiredByProjectSlotIdSet.add(lastSlot.getId());
} else {
slotIdsByOrder.add(lastSlot.getId());
}
injectRowIdColumnSlot(projectionTuple);
SlotRef slotRef = new SlotRef(lastSlot);
inputPlanNode.getProjectList().add(slotRef);
requiredByProjectSlotIdSet.add(lastSlot.getId());
requiredSlotIdSet.add(lastSlot.getId());
}
((OlapScanNode) inputPlanNode).updateRequiredSlots(context, requiredByProjectSlotIdSet);
}
updateScanSlotsMaterialization((ScanNode) inputPlanNode, requiredSlotIdSet,
requiredByProjectSlotIdSet, slotIdsByOrder, context);
requiredByProjectSlotIdSet, context);
} else {
TupleDescriptor tupleDescriptor = generateTupleDesc(slots, null, context);
inputPlanNode.setProjectList(projectionExprs);
@ -2434,22 +2419,10 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
private void updateScanSlotsMaterialization(ScanNode scanNode,
Set<SlotId> requiredSlotIdSet, Set<SlotId> requiredByProjectSlotIdSet,
List<SlotId> slotIdsByOrder, PlanTranslatorContext context) {
PlanTranslatorContext context) {
// TODO: use smallest slot if do not need any slot in upper node
SlotDescriptor smallest = scanNode.getTupleDesc().getSlots().get(0);
if (CollectionUtils.isNotEmpty(slotIdsByOrder)) {
// if we eliminate project above scan, we should ensure the slot order of scan's output is same with
// the projection's output. So, we need to reorder the output slot in scan's tuple.
Map<SlotId, SlotDescriptor> idToSlotDescMap = scanNode.getTupleDesc().getSlots().stream()
.filter(s -> requiredSlotIdSet.contains(s.getId()))
.collect(Collectors.toMap(SlotDescriptor::getId, s -> s));
scanNode.getTupleDesc().getSlots().clear();
for (SlotId slotId : slotIdsByOrder) {
scanNode.getTupleDesc().getSlots().add(idToSlotDescMap.get(slotId));
}
} else {
scanNode.getTupleDesc().getSlots().removeIf(s -> !requiredSlotIdSet.contains(s.getId()));
}
scanNode.getTupleDesc().getSlots().removeIf(s -> !requiredSlotIdSet.contains(s.getId()));
if (scanNode.getTupleDesc().getSlots().isEmpty()) {
scanNode.getTupleDesc().getSlots().add(smallest);
}