diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 72 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir | 27 | 
2 files changed, 50 insertions, 49 deletions
| diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index dcfc39e6755..7fdbf97fe40 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -216,53 +216,55 @@ static bool updateOperand(FoldCandidate &Fold,          break;        }      } +  } -    if (Fold.needsShrink()) { -      MachineBasicBlock *MBB = MI->getParent(); -      auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); -      if (Liveness != MachineBasicBlock::LQR_Dead) -        return false; +  if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) { +    MachineBasicBlock *MBB = MI->getParent(); +    auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); +    if (Liveness != MachineBasicBlock::LQR_Dead) +      return false; -      MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); -      int Op32 = Fold.getShrinkOpcode(); -      MachineOperand &Dst0 = MI->getOperand(0); -      MachineOperand &Dst1 = MI->getOperand(1); -      assert(Dst0.isDef() && Dst1.isDef()); +    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); +    int Op32 = Fold.getShrinkOpcode(); +    MachineOperand &Dst0 = MI->getOperand(0); +    MachineOperand &Dst1 = MI->getOperand(1); +    assert(Dst0.isDef() && Dst1.isDef()); -      bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); +    bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); -      const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); -      unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); +    const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); +    unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); -      MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); +    MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); -      if (HaveNonDbgCarryUse) { -        BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) -          .addReg(AMDGPU::VCC, RegState::Kill); -      } - -      // Keep the old instruction around to avoid breaking iterators, but -      // replace it with a dummy instruction to remove uses. -      // -      // FIXME: We should not invert how this pass looks at operands to avoid -      // this. Should track set of foldable movs instead of looking for uses -      // when looking at a use. -      Dst0.setReg(NewReg0); -      for (unsigned I = MI->getNumOperands() - 1; I > 0; --I) -        MI->RemoveOperand(I); -      MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF)); - -      if (Fold.isCommuted()) -        TII.commuteInstruction(*Inst32, false); -      return true; +    if (HaveNonDbgCarryUse) { +      BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) +        .addReg(AMDGPU::VCC, RegState::Kill);      } -    Old.ChangeToImmediate(Fold.ImmToFold); +    // Keep the old instruction around to avoid breaking iterators, but +    // replace it with a dummy instruction to remove uses. +    // +    // FIXME: We should not invert how this pass looks at operands to avoid +    // this. Should track set of foldable movs instead of looking for uses +    // when looking at a use. +    Dst0.setReg(NewReg0); +    for (unsigned I = MI->getNumOperands() - 1; I > 0; --I) +      MI->RemoveOperand(I); +    MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF)); + +    if (Fold.isCommuted()) +      TII.commuteInstruction(*Inst32, false);      return true;    }    assert(!Fold.needsShrink() && "not handled"); +  if (Fold.isImm()) { +    Old.ChangeToImmediate(Fold.ImmToFold); +    return true; +  } +    if (Fold.isFI()) {      Old.ChangeToFrameIndex(Fold.FrameIndexToFold);      return true; @@ -363,7 +365,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,        if ((Opc == AMDGPU::V_ADD_I32_e64 ||             Opc == AMDGPU::V_SUB_I32_e64 ||             Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME -          OpToFold->isImm()) { +          (OpToFold->isImm() || OpToFold->isFI())) {          MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();          // Verify the other operand is a VGPR, otherwise we would violate the diff --git a/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir index 151d0d38809..d5058c026a1 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir @@ -16,8 +16,8 @@ body:             |      ; GCN: liveins: $vgpr0      ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec      ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 -    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec -    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] +    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]      %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec      %1:vgpr_32 = COPY $vgpr0      %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec @@ -40,8 +40,8 @@ body:             |      ; GCN: liveins: $vgpr0      ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0      ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec -    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec -    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] +    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]      %0:vgpr_32 = COPY $vgpr0      %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec      %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec @@ -64,7 +64,7 @@ body:             |      ; GCN: liveins: $sgpr0      ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec      ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 -    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec +    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec      ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]      %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec      %1:sreg_32_xm0 = COPY $sgpr0 @@ -88,7 +88,7 @@ body:             |      ; GCN: liveins: $sgpr0      ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0      ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec -    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec +    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec      ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]      %0:sreg_32_xm0 = COPY $sgpr0      %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec @@ -112,8 +112,8 @@ body:             |      ; GCN: liveins: $vgpr0      ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0      ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 -    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[COPY]], 0, implicit $exec -    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] +    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]      %0:sreg_32_xm0 = S_MOV_B32 %stack.0      %1:vgpr_32 = COPY $vgpr0      %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec @@ -136,8 +136,8 @@ body:             |      ; GCN: liveins: $vgpr0      ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0      ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 -    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[S_MOV_B32_]], 0, implicit $exec -    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] +    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]      %0:vgpr_32 = COPY $vgpr0      %1:sreg_32_xm0 = S_MOV_B32 %stack.0      %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec @@ -157,8 +157,8 @@ body:             |      ; GCN-LABEL: name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use      ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec -    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], 16, 0, implicit $exec -    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] +    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]      %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec      %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec      %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec @@ -199,8 +199,7 @@ body:             |      ; GCN-LABEL: name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use      ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec -    ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec -    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit-def $vcc, implicit $exec +    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec      ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]      %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec      %1:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec | 

