summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-04-30 19:08:16 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-04-30 19:08:16 +0000
commit0084adc5165622ea838f9af1e5a0559cd128b483 (patch)
treea24f460232a0d35c1eca0b9045b4a22645aa2866 /llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
parent45c7205b617895a96073da30aabfdbae2bb8651c (diff)
downloadbcm5719-llvm-0084adc5165622ea838f9af1e5a0559cd128b483.tar.gz
bcm5719-llvm-0084adc5165622ea838f9af1e5a0559cd128b483.zip
AMDGPU: Add Vega12 and Vega20
Changes by Matt Arsenault Konstantin Zhuravlyov llvm-svn: 331215
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp40
1 files changed, 26 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f4ff718e42d..897ffa948e2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2161,20 +2161,24 @@ static int64_t getFoldableImm(const MachineOperand* MO) {
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
MachineInstr &MI,
LiveVariables *LV) const {
+ unsigned Opc = MI.getOpcode();
bool IsF16 = false;
+ bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64;
- switch (MI.getOpcode()) {
+ switch (Opc) {
default:
return nullptr;
case AMDGPU::V_MAC_F16_e64:
IsF16 = true;
LLVM_FALLTHROUGH;
case AMDGPU::V_MAC_F32_e64:
+ case AMDGPU::V_FMAC_F32_e64:
break;
case AMDGPU::V_MAC_F16_e32:
IsF16 = true;
LLVM_FALLTHROUGH;
- case AMDGPU::V_MAC_F32_e32: {
+ case AMDGPU::V_MAC_F32_e32:
+ case AMDGPU::V_FMAC_F32_e32: {
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::src0);
const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
@@ -2199,7 +2203,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
- if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
+ if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
// If we have an SGPR input, we will violate the constant bus restriction.
(!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
if (auto Imm = getFoldableImm(Src2)) {
@@ -2230,8 +2234,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
}
}
- return BuildMI(*MBB, MI, MI.getDebugLoc(),
- get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
+ assert((!IsFMA || !IsF16) && "fmac only expected with f32");
+ unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 :
+ (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
+ return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.addImm(Src0Mods ? Src0Mods->getImm() : 0)
.add(*Src0)
@@ -4048,17 +4054,23 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
- unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
- .add(Src0)
- .add(Src1);
+ unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ if (ST.hasDLInsts()) {
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
+ .add(Src0)
+ .add(Src1);
+ } else {
+ unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
+ .add(Src0)
+ .add(Src1);
- unsigned Not = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), Not)
- .addReg(Xor);
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), NewDest)
+ .addReg(Xor);
+ }
- MRI.replaceRegWith(Dest.getReg(), Not);
- addUsersToMoveToVALUWorklist(Not, MRI, Worklist);
+ MRI.replaceRegWith(Dest.getReg(), NewDest);
+ addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitUnaryOp(
OpenPOWER on IntegriCloud