diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-09-21 00:45:59 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-09-21 00:45:59 +0000 | 
| commit | fdcdd88d5718c4785b7d3e3bc9b9e866b56590d5 (patch) | |
| tree | ff6b656c4eb6b7b0f4d31f4045c9844ee4e9f255 | |
| parent | be88ae0eb05d3036977fcd7816de9b18403d7756 (diff) | |
| download | bcm5719-llvm-fdcdd88d5718c4785b7d3e3bc9b9e866b56590d5.tar.gz bcm5719-llvm-fdcdd88d5718c4785b7d3e3bc9b9e866b56590d5.zip  | |
AMDGPU: Fix crash on immediate operand
We can have a v_mac with an immediate src0.
We can still fold if it's an inline immediate,
otherwise it already uses the constant bus.
llvm-svn: 313852
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir | 58 | 
2 files changed, 63 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 075717e1f8b..b2fbcce66d5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2174,8 +2174,12 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,      int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),                                               AMDGPU::OpName::src0);      const MachineOperand *Src0 = &MI.getOperand(Src0Idx); +    if (!Src0->isReg() && !Src0->isImm()) +      return nullptr; +      if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))        return nullptr; +      break;    }    } @@ -2193,7 +2197,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,    if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&        // If we have an SGPR input, we will violate the constant bus restriction. -      !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg())) { +      (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {      if (auto Imm = getFoldableImm(Src2)) {        return BuildMI(*MBB, MI, MI.getDebugLoc(),                       get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32)) diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir index cd8931fb21a..b4e42e83374 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir @@ -130,3 +130,61 @@ body:             |      %2 = V_MAC_F32_e32 killed %0, %1, %3, implicit %exec  ... + +# This can still fold if this is an inline immediate. + +# GCN-LABEL: name: test_madak_inlineimm_src0_f32 +# GCN: %1 = V_MADMK_F32 1073741824, 1078523331, %2, implicit %exec + +--- +name:            test_madak_inlineimm_src0_f32 +registers: +  - { id: 0, class: vgpr_32} +  - { id: 1, class: vgpr_32 } +  - { id: 2, class: vgpr_32 } +body:             | +  bb.0: + +    %0 = V_MOV_B32_e32 1078523331, implicit %exec +    %1 = V_MAC_F32_e32 1073741824, %0, %2, implicit %exec + +... +# Non-inline immediate uses constant bus already. + +# GCN-LABEL: name: test_madak_otherimm_src0_f32 +# GCN: %1 = V_MAC_F32_e32 1120403456, %0, %1, implicit %exec + +--- +name:            test_madak_otherimm_src0_f32 +registers: +  - { id: 0, class: vgpr_32} +  - { id: 1, class: vgpr_32 } +  - { id: 2, class: vgpr_32 } +body:             | +  bb.0: + +    %0 = V_MOV_B32_e32 1078523331, implicit %exec +    %1 = V_MAC_F32_e32 1120403456, %0, %2, implicit %exec + +... +# Non-inline immediate uses constant bus already. + +# GCN-LABEL: name: test_madak_other_constantlike_src0_f32 +# GCN: %1 = V_MAC_F32_e32 %stack.0, %0, %1, implicit %exec +--- +name:            test_madak_other_constantlike_src0_f32 +registers: +  - { id: 0, class: vgpr_32} +  - { id: 1, class: vgpr_32 } +  - { id: 2, class: vgpr_32 } +stack: +  - { id: 0, name: "", type: default, offset: 0, size: 128, alignment: 8, +      callee-saved-register: '', local-offset: 0, di-variable: '', di-expression: '', +      di-location: '' } +body:             | +  bb.0: + +    %0 = V_MOV_B32_e32 1078523331, implicit %exec +    %1 = V_MAC_F32_e32 %stack.0, %0, %2, implicit %exec + +...  | 

