diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 35 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll | 6 | 
2 files changed, 28 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index b55dee68d51..e1257b1d33e 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -48,24 +48,36 @@ public:  struct FoldCandidate {    MachineInstr *UseMI; -  unsigned UseOpNo; -  MachineOperand *OpToFold; -  uint64_t ImmToFold; +  union { +    MachineOperand *OpToFold; +    uint64_t ImmToFold; +    int FrameIndexToFold; +  }; +  unsigned char UseOpNo; +  MachineOperand::MachineOperandType Kind;    FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : -                UseMI(MI), UseOpNo(OpNo) { - +    UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {      if (FoldOp->isImm()) { -      OpToFold = nullptr;        ImmToFold = FoldOp->getImm(); +    } else if (FoldOp->isFI()) { +      FrameIndexToFold = FoldOp->getIndex();      } else {        assert(FoldOp->isReg());        OpToFold = FoldOp;      }    } +  bool isFI() const { +    return Kind == MachineOperand::MO_FrameIndex; +  } +    bool isImm() const { -    return !OpToFold; +    return Kind == MachineOperand::MO_Immediate; +  } + +  bool isReg() const { +    return Kind == MachineOperand::MO_Register;    }  }; @@ -107,6 +119,11 @@ static bool updateOperand(FoldCandidate &Fold,      return true;    } +  if (Fold.isFI()) { +    Old.ChangeToFrameIndex(Fold.FrameIndexToFold); +    return true; +  } +    MachineOperand *New = Fold.OpToFold;    if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&        TargetRegisterInfo::isVirtualRegister(New->getReg())) { @@ -448,7 +465,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {        unsigned OpSize = TII->getOpSize(MI, 1);        MachineOperand &OpToFold = MI.getOperand(1); -      bool FoldingImm = OpToFold.isImm(); +      bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();        // FIXME: We could also be folding things like FrameIndexes and        // TargetIndexes. @@ -500,7 +517,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {        for (FoldCandidate &Fold : FoldList) {          if (updateOperand(Fold, TRI)) {            // Clear kill flags. -          if (!Fold.isImm()) { +          if (Fold.isReg()) {              assert(Fold.OpToFold && Fold.OpToFold->isReg());              // FIXME: Probably shouldn't bother trying to fold if not an              // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll index 6e6f289f5d6..eb554e21730 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll @@ -7,10 +7,8 @@  ;  ; CHECK-LABEL: {{^}}main:  ; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 -; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200 -; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0 -; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]] -; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]] +; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]] +; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]  ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen  ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen  define amdgpu_ps float @main(i32 %idx) {  | 

