diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/MachineScheduler.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 100 |
2 files changed, 91 insertions, 25 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index e423c89b1db..7313d43c806 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1490,8 +1490,20 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { : SU(su), BaseOp(Op), Offset(ofs) {} bool operator<(const MemOpInfo &RHS) const { - return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < - std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, RHS.SU->NodeNum); + if (BaseOp->getType() != RHS.BaseOp->getType()) + return BaseOp->getType() < RHS.BaseOp->getType(); + + if (BaseOp->isReg()) + return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) < + std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset, + RHS.SU->NodeNum); + if (BaseOp->isFI()) + return std::make_tuple(BaseOp->getIndex(), Offset, SU->NodeNum) < + std::make_tuple(RHS.BaseOp->getIndex(), RHS.Offset, + RHS.SU->NodeNum); + + llvm_unreachable("MemOpClusterMutation only supports register or frame " + "index bases."); } }; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 7652d8ea1a4..ac50aa7cdee 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1146,9 +1146,9 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; - // Retrieve the base register, offset from the base and width. Width + // Retrieve the base, offset from the base and width. Width // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If - // base registers are identical, and the offset of a lower memory access + + // base are identical, and the offset of a lower memory access + // the width doesn't overlap the offset of a higher memory access, // then the memory accesses are different. if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && @@ -2035,8 +2035,9 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { if (MI.hasOrderedMemoryRef()) return false; - // Make sure this is a reg+imm (as opposed to an address reloc). - assert(MI.getOperand(1).isReg() && "Expected a reg operand."); + // Make sure this is a reg/fi+imm (as opposed to an address reloc). + assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) && + "Expected a reg or frame index operand."); if (!MI.getOperand(2).isImm()) return false; @@ -2086,11 +2087,13 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth( // Handle only loads/stores with base register followed by immediate offset. if (LdSt.getNumExplicitOperands() == 3) { // Non-paired instruction (e.g., ldr x1, [x0, #8]). - if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm()) + if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || + !LdSt.getOperand(2).isImm()) return false; } else if (LdSt.getNumExplicitOperands() == 4) { // Paired instruction (e.g., ldp x1, x2, [x0, #8]). - if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() || + if (!LdSt.getOperand(1).isReg() || + (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) || !LdSt.getOperand(3).isImm()) return false; } else @@ -2117,9 +2120,9 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth( Offset = LdSt.getOperand(3).getImm() * Scale; } - assert( - BaseOp->isReg() && - "getMemOperandWithOffset only supports base operands of type register."); + assert((BaseOp->isReg() || BaseOp->isFI()) && + "getMemOperandWithOffset only supports base " + "operands of type register or frame index."); return true; } @@ -2275,31 +2278,33 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, return true; } -// Scale the unscaled offsets. Returns false if the unscaled offset can't be -// scaled. -static bool scaleOffset(unsigned Opc, int64_t &Offset) { - unsigned OffsetStride = 1; +static unsigned getOffsetStride(unsigned Opc) { switch (Opc) { default: - return false; + return 0; case AArch64::LDURQi: case AArch64::STURQi: - OffsetStride = 16; - break; + return 16; case AArch64::LDURXi: case AArch64::LDURDi: case AArch64::STURXi: case AArch64::STURDi: - OffsetStride = 8; - break; + return 8; case AArch64::LDURWi: case AArch64::LDURSi: case AArch64::LDURSWi: case AArch64::STURWi: case AArch64::STURSi: - OffsetStride = 4; - break; + return 4; } +} + +// Scale the unscaled offsets. Returns false if the unscaled offset can't be +// scaled. +static bool scaleOffset(unsigned Opc, int64_t &Offset) { + unsigned OffsetStride = getOffsetStride(Opc); + if (OffsetStride == 0) + return false; // If the byte-offset isn't a multiple of the stride, we can't scale this // offset. if (Offset % OffsetStride != 0) @@ -2311,6 +2316,19 @@ static bool scaleOffset(unsigned Opc, int64_t &Offset) { return true; } +// Unscale the scaled offsets. Returns false if the scaled offset can't be +// unscaled. +static bool unscaleOffset(unsigned Opc, int64_t &Offset) { + unsigned OffsetStride = getOffsetStride(Opc); + if (OffsetStride == 0) + return false; + + // Convert the "element" offset used by scaled pair load/store instructions + // into the byte-offset used by unscaled. + Offset *= OffsetStride; + return true; +} + static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { if (FirstOpc == SecondOpc) return true; @@ -2329,6 +2347,30 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { return false; } +static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, + int64_t Offset1, unsigned Opcode1, int FI2, + int64_t Offset2, unsigned Opcode2) { + // Accesses through fixed stack object frame indices may access a different + // fixed stack slot. Check that the object offsets + offsets match. + if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) { + int64_t ObjectOffset1 = MFI.getObjectOffset(FI1); + int64_t ObjectOffset2 = MFI.getObjectOffset(FI2); + assert(ObjectOffset1 >= ObjectOffset2 && "Object offsets are not ordered."); + // Get the byte-offset from the object offset. + if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2)) + return false; + ObjectOffset1 += Offset1; + ObjectOffset2 += Offset2; + // Get the "element" index in the object. + if (!scaleOffset(Opcode1, ObjectOffset1) || + !scaleOffset(Opcode2, ObjectOffset2)) + return false; + return ObjectOffset2 + 1 == ObjectOffset1; + } + + return FI1 == FI2; +} + /// Detect opportunities for ldp/stp formation. /// /// Only called for LdSt for which getMemOperandWithOffset returns true. @@ -2340,9 +2382,11 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1, if (BaseOp1.getType() != BaseOp2.getType()) return false; - assert(BaseOp1.isReg() && "Only base registers are supported."); + assert(BaseOp1.isReg() || + BaseOp1.isFI() && + "Only base registers and frame indices are supported."); - // Check for base regs. + // Check for both base regs and base FI. if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) return false; @@ -2379,7 +2423,17 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1, return false; // The caller should already have ordered First/SecondLdSt by offset. - assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); + // Note: except for non-equal frame index bases + assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) && + "Caller should have ordered offsets."); + + if (BaseOp1.isFI()) { + const MachineFrameInfo &MFI = + FirstLdSt.getParent()->getParent()->getFrameInfo(); + return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc, + BaseOp2.getIndex(), Offset2, SecondOpc); + } + return Offset1 + 1 == Offset2; } |

