diff -Naur a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp 2024-04-15 20:53:13.643344012 +0800 +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp 2024-04-15 21:13:51.463632273 +0800 @@ -1097,9 +1097,6 @@ // Fold the return instruction into the LDM. DeleteRet = true; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; - // We 'restore' LR into PC so it is not live out of the return block: - // Clear Restored bit. - Info.setRestored(false); } else LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; } @@ -2155,6 +2152,35 @@ AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); } +void ARMFrameLowering::updateLRRestored(MachineFunction &MF) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + // Check if all terminators do not implicitly use LR. Then we can 'restore' LR + // into PC so it is not live out of the return block: Clear the Restored bit + // in that case. + for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { + if (Info.getReg() != ARM::LR) + continue; + if (all_of(MF, [](const MachineBasicBlock &MBB) { + return all_of(MBB.terminators(), [](const MachineInstr &Term) { + return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET || + Term.getOpcode() == ARM::t2LDMIA_RET || + Term.getOpcode() == ARM::tPOP_RET; + }); + })) { + Info.setRestored(false); + break; + } + } +} + +void ARMFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); + updateLRRestored(MF); +} + void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const { TargetFrameLowering::getCalleeSaves(MF, SavedRegs); diff -Naur a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h --- a/llvm/lib/Target/ARM/ARMFrameLowering.h 2024-04-15 20:53:13.643344012 +0800 +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h 2024-04-15 21:13:51.463632273 +0800 @@ -58,6 +58,13 @@ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + /// Update the IsRestored flag on LR if it is spilled, based on the return + /// instructions. + static void updateLRRestored(MachineFunction &MF); + + void processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff -Naur a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp 2024-04-15 20:53:13.643344012 +0800 +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp 2024-04-15 21:13:51.483632535 +0800 @@ -2037,19 +2037,6 @@ MO.setReg(ARM::PC); PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); MBB.erase(MBBI); - // We now restore LR into PC so it is not live-out of the return block - // anymore: Clear the CSI Restored bit. - MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); - // CSI should be fixed after PrologEpilog Insertion - assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid"); - for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { - if (Info.getReg() == ARM::LR) { - Info.setRestored(false); - break; - } - } - return true; - } } return false; } @@ -2095,16 +2082,24 @@ isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; - bool Modified = false; + bool Modified = false, ModifiedLDMReturn = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; Modified |= LoadStoreMultipleOpti(MBB); if (STI->hasV5TOps()) - Modified |= MergeReturnIntoLDM(MBB); + ModifiedLDMReturn |= MergeReturnIntoLDM(MBB); if (isThumb1) Modified |= CombineMovBx(MBB); } + Modified |= ModifiedLDMReturn; + + // If we merged a BX instruction into an LDM, we need to re-calculate whether + // LR is restored. This check needs to consider the whole function, not just + // the instruction(s) we changed, because there may be other BX returns which + // still need LR to be restored. + if (ModifiedLDMReturn) + ARMFrameLowering::updateLRRestored(Fn); Allocator.DestroyAll(); return Modified;