diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-04-30 19:08:16 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-04-30 19:08:16 +0000 |
commit | 0084adc5165622ea838f9af1e5a0559cd128b483 (patch) | |
tree | a24f460232a0d35c1eca0b9045b4a22645aa2866 /llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | |
parent | 45c7205b617895a96073da30aabfdbae2bb8651c (diff) | |
download | bcm5719-llvm-0084adc5165622ea838f9af1e5a0559cd128b483.tar.gz bcm5719-llvm-0084adc5165622ea838f9af1e5a0559cd128b483.zip |
AMDGPU: Add Vega12 and Vega20
Changes by
Matt Arsenault
Konstantin Zhuravlyov
llvm-svn: 331215
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 40 |
1 files changed, 26 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index f4ff718e42d..897ffa948e2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2161,20 +2161,24 @@ static int64_t getFoldableImm(const MachineOperand* MO) { MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const { + unsigned Opc = MI.getOpcode(); bool IsF16 = false; + bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64; - switch (MI.getOpcode()) { + switch (Opc) { default: return nullptr; case AMDGPU::V_MAC_F16_e64: IsF16 = true; LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e64: + case AMDGPU::V_FMAC_F32_e64: break; case AMDGPU::V_MAC_F16_e32: IsF16 = true; LLVM_FALLTHROUGH; - case AMDGPU::V_MAC_F32_e32: { + case AMDGPU::V_MAC_F32_e32: + case AMDGPU::V_FMAC_F32_e32: { int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); const MachineOperand *Src0 = &MI.getOperand(Src0Idx); @@ -2199,7 +2203,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod); - if (!Src0Mods && !Src1Mods && !Clamp && !Omod && + if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod && // If we have an SGPR input, we will violate the constant bus restriction. (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) { if (auto Imm = getFoldableImm(Src2)) { @@ -2230,8 +2234,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, } } - return BuildMI(*MBB, MI, MI.getDebugLoc(), - get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32)) + assert((!IsFMA || !IsF16) && "fmac only expected with f32"); + unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 : + (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32); + return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) .add(*Dst) .addImm(Src0Mods ? Src0Mods->getImm() : 0) .add(*Src0) @@ -4048,17 +4054,23 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); - unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor) - .add(Src0) - .add(Src1); + unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + if (ST.hasDLInsts()) { + BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest) + .add(Src0) + .add(Src1); + } else { + unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor) + .add(Src0) + .add(Src1); - unsigned Not = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), Not) - .addReg(Xor); + BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), NewDest) + .addReg(Xor); + } - MRI.replaceRegWith(Dest.getReg(), Not); - addUsersToMoveToVALUWorklist(Not, MRI, Worklist); + MRI.replaceRegWith(Dest.getReg(), NewDest); + addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist); } void SIInstrInfo::splitScalar64BitUnaryOp( |