diff options
author | Jay Foad <jay.foad@amd.com> | 2019-12-11 10:29:23 +0000 |
---|---|---|
committer | Jay Foad <jay.foad@amd.com> | 2019-12-18 09:46:11 +0000 |
commit | 97ca7c2cc9083ebde681b0e11f7a8ccae1966d64 (patch) | |
tree | b619d6f8413aa57e27a897b7d0650836f1d0133c /llvm/lib | |
parent | 862a60241687a2f718d2c4f554afd9d520da8952 (diff) | |
download | bcm5719-llvm-97ca7c2cc9083ebde681b0e11f7a8ccae1966d64.tar.gz bcm5719-llvm-97ca7c2cc9083ebde681b0e11f7a8ccae1966d64.zip |
[AArch64] Enable clustering memory accesses to fixed stack objects
Summary:
r347747 added support for clustering mem ops with FI base operands
including support for fixed stack objects in shouldClusterFI, but
apparently this was never tested.
This patch fixes shouldClusterFI to work with scaled as well as
unscaled load/store instructions, and fixes the ordering of memory ops
in MemOpInfo::operator< to ensure that memory addresses always
increase, regardless of which direction the stack grows.
Subscribers: MatzeB, kristof.beyls, hiraditya, javed.absar, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71334
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/MachineScheduler.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 107 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 95 |
4 files changed, 91 insertions, 119 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 92dbc48ae2a..a10096606b6 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1498,7 +1498,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { : BaseOp->getIndex() < RHS.BaseOp->getIndex(); if (Offset != RHS.Offset) - return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset; + return Offset < RHS.Offset; return SU->NodeNum < RHS.SU->NodeNum; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 18c098bcaca..45a2d77c111 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2230,54 +2230,82 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, return true; } -static unsigned getOffsetStride(unsigned Opc) { +// Scaling factor for unscaled load or store. +int AArch64InstrInfo::getMemScale(unsigned Opc) { switch (Opc) { default: - return 0; - case AArch64::LDURQi: - case AArch64::STURQi: - return 16; - case AArch64::LDURXi: - case AArch64::LDURDi: - case AArch64::STURXi: - case AArch64::STURDi: - return 8; - case AArch64::LDURWi: + llvm_unreachable("Opcode has unknown scale!"); + case AArch64::LDRBBui: + case AArch64::LDURBBi: + case AArch64::LDRSBWui: + case AArch64::LDURSBWi: + case AArch64::STRBBui: + case AArch64::STURBBi: + return 1; + case AArch64::LDRHHui: + case AArch64::LDURHHi: + case AArch64::LDRSHWui: + case AArch64::LDURSHWi: + case AArch64::STRHHui: + case AArch64::STURHHi: + return 2; + case AArch64::LDRSui: case AArch64::LDURSi: + case AArch64::LDRSWui: case AArch64::LDURSWi: - case AArch64::STURWi: + case AArch64::LDRWui: + case AArch64::LDURWi: + case AArch64::STRSui: case AArch64::STURSi: + case AArch64::STRWui: + case AArch64::STURWi: + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPWi: + case AArch64::STPSi: + case AArch64::STPWi: return 4; + case AArch64::LDRDui: + case AArch64::LDURDi: + case AArch64::LDRXui: + case AArch64::LDURXi: + case AArch64::STRDui: + case AArch64::STURDi: + case AArch64::STRXui: + case AArch64::STURXi: + case AArch64::LDPDi: + case AArch64::LDPXi: + case AArch64::STPDi: + case AArch64::STPXi: + return 8; + case AArch64::LDRQui: + case AArch64::LDURQi: + case AArch64::STRQui: + case AArch64::STURQi: + case AArch64::LDPQi: + case AArch64::STPQi: + case AArch64::STGOffset: + case AArch64::STZGOffset: + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + case AArch64::STGPi: + return 16; } } // Scale the unscaled offsets. Returns false if the unscaled offset can't be // scaled. static bool scaleOffset(unsigned Opc, int64_t &Offset) { - unsigned OffsetStride = getOffsetStride(Opc); - if (OffsetStride == 0) - return false; + int Scale = AArch64InstrInfo::getMemScale(Opc); + // If the byte-offset isn't a multiple of the stride, we can't scale this // offset. - if (Offset % OffsetStride != 0) + if (Offset % Scale != 0) return false; // Convert the byte-offset used by unscaled into an "element" offset used // by the scaled pair load/store instructions. - Offset /= OffsetStride; - return true; -} - -// Unscale the scaled offsets. Returns false if the scaled offset can't be -// unscaled. -static bool unscaleOffset(unsigned Opc, int64_t &Offset) { - unsigned OffsetStride = getOffsetStride(Opc); - if (OffsetStride == 0) - return false; - - // Convert the "element" offset used by scaled pair load/store instructions - // into the byte-offset used by unscaled. - Offset *= OffsetStride; + Offset /= Scale; return true; } @@ -2308,15 +2336,17 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered."); - // Get the byte-offset from the object offset. - if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2)) + // Convert to scaled object offsets. + int Scale1 = AArch64InstrInfo::getMemScale(Opcode1); + if (ObjectOffset1 % Scale1 != 0) return false; + ObjectOffset1 /= Scale1; + int Scale2 = AArch64InstrInfo::getMemScale(Opcode2); + if (ObjectOffset2 % Scale2 != 0) + return false; + ObjectOffset2 /= Scale2; ObjectOffset1 += Offset1; ObjectOffset2 += Offset2; - // Get the "element" index in the object. - if (!scaleOffset(Opcode1, ObjectOffset1) || - !scaleOffset(Opcode2, ObjectOffset2)) - return false; return ObjectOffset1 + 1 == ObjectOffset2; } @@ -2376,7 +2406,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, // The caller should already have ordered First/SecondLdSt by offset. // Note: except for non-equal frame index bases if (BaseOp1.isFI()) { - assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) && + assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && "Caller should have ordered offsets."); const MachineFrameInfo &MFI = @@ -2385,8 +2415,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, BaseOp2.getIndex(), Offset2, SecondOpc); } - assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && - "Caller should have ordered offsets."); + assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); return Offset1 + 1 == Offset2; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index c3d27836f39..66e517e5490 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -89,6 +89,12 @@ public: /// if there is a corresponding unscaled variant available. static Optional<unsigned> getUnscaledLdSt(unsigned Opc); + /// Scaling factor for (scaled or unscaled) load or store. + static int getMemScale(unsigned Opc); + static int getMemScale(const MachineInstr &MI) { + return getMemScale(MI.getOpcode()); + } + /// Returns the index for the immediate for a given instruction. static unsigned getLoadStoreImmIdx(unsigned Opc); diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 296115c2006..d24e6d63f15 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -230,69 +230,6 @@ static bool isTagStore(const MachineInstr &MI) { } } -// Scaling factor for unscaled load or store. -static int getMemScale(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - llvm_unreachable("Opcode has unknown scale!"); - case AArch64::LDRBBui: - case AArch64::LDURBBi: - case AArch64::LDRSBWui: - case AArch64::LDURSBWi: - case AArch64::STRBBui: - case AArch64::STURBBi: - return 1; - case AArch64::LDRHHui: - case AArch64::LDURHHi: - case AArch64::LDRSHWui: - case AArch64::LDURSHWi: - case AArch64::STRHHui: - case AArch64::STURHHi: - return 2; - case AArch64::LDRSui: - case AArch64::LDURSi: - case AArch64::LDRSWui: - case AArch64::LDURSWi: - case AArch64::LDRWui: - case AArch64::LDURWi: - case AArch64::STRSui: - case AArch64::STURSi: - case AArch64::STRWui: - case AArch64::STURWi: - case AArch64::LDPSi: - case AArch64::LDPSWi: - case AArch64::LDPWi: - case AArch64::STPSi: - case AArch64::STPWi: - return 4; - case AArch64::LDRDui: - case AArch64::LDURDi: - case AArch64::LDRXui: - case AArch64::LDURXi: - case AArch64::STRDui: - case AArch64::STURDi: - case AArch64::STRXui: - case AArch64::STURXi: - case AArch64::LDPDi: - case AArch64::LDPXi: - case AArch64::STPDi: - case AArch64::STPXi: - return 8; - case AArch64::LDRQui: - case AArch64::LDURQi: - case AArch64::STRQui: - case AArch64::STURQi: - case AArch64::LDPQi: - case AArch64::STPQi: - case AArch64::STGOffset: - case AArch64::STZGOffset: - case AArch64::ST2GOffset: - case AArch64::STZ2GOffset: - case AArch64::STGPi: - return 16; - } -} - static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc = nullptr) { if (IsValidLdStrOpc) @@ -603,7 +540,7 @@ static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, // ST*G and all paired ldst have the same scale in pre/post-indexed variants // as in the "unsigned offset" variant. // All other pre/post indexed ldst instructions are unscaled. - Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1; + Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1; if (IsPaired) { MinOffset = -64; @@ -635,8 +572,8 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII) { assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); - int LoadSize = getMemScale(LoadInst); - int StoreSize = getMemScale(StoreInst); + int LoadSize = TII->getMemScale(LoadInst); + int StoreSize = TII->getMemScale(StoreInst); int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst) ? getLdStOffsetOp(StoreInst).getImm() : getLdStOffsetOp(StoreInst).getImm() * StoreSize; @@ -746,7 +683,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, unsigned Opc = I->getOpcode(); bool IsScaled = !TII->isUnscaledLdSt(Opc); - int OffsetStride = IsScaled ? 1 : getMemScale(*I); + int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I); bool MergeForward = Flags.getMergeForward(); // Insert our new paired instruction after whichever of the paired @@ -853,7 +790,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, unsigned Opc = SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); bool IsUnscaled = TII->isUnscaledLdSt(Opc); - int OffsetStride = IsUnscaled ? getMemScale(*I) : 1; + int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1; bool MergeForward = Flags.getMergeForward(); @@ -938,11 +875,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, // We're trying to pair instructions that differ in how they are scaled. If // I is scaled then scale the offset of Paired accordingly. Otherwise, do // the opposite (i.e., make Paired's offset unscaled). - int MemSize = getMemScale(*Paired); + int MemSize = TII->getMemScale(*Paired); if (PairedIsUnscaled) { // If the unscaled offset isn't a multiple of the MemSize, we can't // pair the operations together. - assert(!(PairedOffset % getMemScale(*Paired)) && + assert(!(PairedOffset % TII->getMemScale(*Paired)) && "Offset should be a multiple of the stride!"); PairedOffset /= MemSize; } else { @@ -967,9 +904,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, int OffsetImm = getLdStOffsetOp(*RtMI).getImm(); // Scale the immediate offset, if necessary. if (TII->isUnscaledLdSt(RtMI->getOpcode())) { - assert(!(OffsetImm % getMemScale(*RtMI)) && + assert(!(OffsetImm % TII->getMemScale(*RtMI)) && "Unscaled offset cannot be scaled."); - OffsetImm /= getMemScale(*RtMI); + OffsetImm /= TII->getMemScale(*RtMI); } // Construct the new instruction. @@ -1069,8 +1006,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, MachineBasicBlock::iterator NextI = LoadI; ++NextI; - int LoadSize = getMemScale(*LoadI); - int StoreSize = getMemScale(*StoreI); + int LoadSize = TII->getMemScale(*LoadI); + int StoreSize = TII->getMemScale(*StoreI); Register LdRt = getLdStRegOp(*LoadI).getReg(); const MachineOperand &StMO = getLdStRegOp(*StoreI); Register StRt = getLdStRegOp(*StoreI).getReg(); @@ -1489,7 +1426,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, Register Reg = getLdStRegOp(FirstMI).getReg(); Register BaseReg = getLdStBaseOp(FirstMI).getReg(); int Offset = getLdStOffsetOp(FirstMI).getImm(); - int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; + int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1; bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); Optional<bool> MaybeCanRename = None; @@ -1534,7 +1471,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // We're trying to pair instructions that differ in how they are scaled. // If FirstMI is scaled then scale the offset of MI accordingly. // Otherwise, do the opposite (i.e., make MI's offset unscaled). - int MemSize = getMemScale(MI); + int MemSize = TII->getMemScale(MI); if (MIIsUnscaled) { // If the unscaled offset isn't a multiple of the MemSize, we can't // pair the operations together: bail and keep looking. @@ -1792,7 +1729,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineBasicBlock::iterator MBBI = I; Register BaseReg = getLdStBaseOp(MemMI).getReg(); - int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); + int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI); // Scan forward looking for post-index opportunities. Updating instructions // can't be formed if the memory instruction doesn't have the offset we're @@ -1963,7 +1900,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { // with Offset-1) bool IsUnscaled = TII->isUnscaledLdSt(MI); int Offset = getLdStOffsetOp(MI).getImm(); - int OffsetStride = IsUnscaled ? getMemScale(MI) : 1; + int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1; // Allow one more for offset. if (Offset > 0) Offset -= OffsetStride; @@ -2029,7 +1966,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate // The immediate in the load/store is scaled by the size of the memory // operation. The immediate in the add we're looking for, // however, is not, so adjust here. - int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); + int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI); // Look forward to try to find a pre-index instruction. For example, // ldr x1, [x0, #64] |