diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 56 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 14 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 16 |
7 files changed, 141 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 240afb75197..80e45d9c2c6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -110,8 +110,11 @@ private: SDValue &Offset, SDValue &GLC) const; SDNode *SelectAddrSpaceCast(SDNode *N); bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp, SDValue &Omod) const; bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Omod) const; @@ -1317,6 +1320,12 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, return true; } +bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + bool Res = SelectVOP3Mods(In, Src, SrcMods); + return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const { @@ -1328,6 +1337,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, return SelectVOP3Mods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, + SDValue &SrcMods, SDValue &Clamp, + SDValue &Omod) const { + bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); + + return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && + cast<ConstantSDNode>(Clamp)->isNullValue() && + cast<ConstantSDNode>(Omod)->isNullValue(); +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Omod) const { diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index d14e37a6461..c2887255cc1 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -126,11 +126,42 @@ static bool updateOperand(FoldCandidate &Fold, return false; } +static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList, + const MachineInstr *MI) { + for (auto Candidate : FoldList) { + if (Candidate.UseMI == MI) + return true; + } + return false; +} + static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII) { if (!TII->isOperandLegal(MI, OpNo, OpToFold)) { + + // Special case for v_mac_f32_e64 if we are trying to fold into src2 + unsigned Opc = MI->getOpcode(); + if (Opc == AMDGPU::V_MAC_F32_e64 && + (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { + // Check if changing this to a v_mad_f32 instruction will allow us to + // fold the operand. + MI->setDesc(TII->get(AMDGPU::V_MAD_F32)); + bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII); + if (FoldAsMAD) { + MI->untieRegOperand(OpNo); + return true; + } + MI->setDesc(TII->get(Opc)); + } + + // If we are already folding into another operand of MI, then + // we can't commute the instruction, otherwise we risk making the + // other fold illegal. + if (isUseMIInFoldList(FoldList, MI)) + return false; + // Operand is not legal, so try to commute the instruction to // see if this makes it possible to fold. unsigned CommuteIdx0; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 288ab7523e3..8775a1f69ca 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -924,7 +924,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, return false; unsigned Opc = UseMI->getOpcode(); - if (Opc == AMDGPU::V_MAD_F32) { + if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { // Don't fold if we are using source modifiers. The new VOP2 instructions // don't have them. if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) || @@ -963,9 +963,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, // instead of having to modify in place. // Remove these first since they are at the end. - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32, + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32, + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); unsigned Src1Reg = Src1->getReg(); @@ -980,6 +980,14 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, Src1->setSubReg(Src2SubReg); Src1->setIsKill(Src2->isKill()); + if (Opc == AMDGPU::V_MAC_F32_e64) { + UseMI->untieRegOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + } + + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, + AMDGPU::OpName::src2)); + // ChangingToImmediate adds Src2 back to the instruction. Src2->ChangeToImmediate(Imm); removeModOperands(*UseMI); @@ -1010,11 +1018,17 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, // instead of having to modify in place. // Remove these first since they are at the end. - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32, + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32, + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); + if (Opc == AMDGPU::V_MAC_F32_e64) { + UseMI->untieRegOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + } + + // ChangingToImmediate adds Src2 back to the instruction. Src2->ChangeToImmediate(Imm); // These come before src2. @@ -1126,6 +1140,38 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, return false; } +MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, + MachineBasicBlock::iterator &MI, + LiveVariables *LV) const { + + switch (MI->getOpcode()) { + default: return nullptr; + case AMDGPU::V_MAC_F32_e64: break; + case AMDGPU::V_MAC_F32_e32: { + const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); + if (Src0->isImm() && !isInlineConstant(*Src0, 4)) + return nullptr; + break; + } + } + + const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst); + const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); + const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1); + const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2); + + return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32)) + .addOperand(*Dst) + .addImm(0) // Src0 mods + .addOperand(*Src0) + .addImm(0) // Src1 mods + .addOperand(*Src1) + .addImm(0) // Src mods + .addOperand(*Src2) + .addImm(0) // clamp + .addImm(0); // omod +} + bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { int64_t SVal = Imm.getSExtValue(); if (SVal >= -16 && SVal <= 64) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 0382272068d..015ea12d459 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -144,6 +144,10 @@ public: unsigned getMachineCSELookAheadLimit() const override { return 500; } + MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB, + MachineBasicBlock::iterator &MI, + LiveVariables *LV) const override; + bool isSALU(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SALU; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index fcb58d5da3b..b39a7871464 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -529,9 +529,11 @@ def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">; def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">; def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; +def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">; def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">; def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">; def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; +def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">; //===----------------------------------------------------------------------===// // SI assembler operands @@ -1113,6 +1115,13 @@ def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> { field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2); field string Asm = "$dst, $src0, $vsrc1, $src2"; } +def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> { + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); + let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3, + HasModifiers>.ret; + let Asm32 = getAsm32<2>.ret; + let Asm64 = getAsm64<2, HasModifiers>.ret; +} def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 8c8d836776d..1ee63c67582 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1488,7 +1488,10 @@ defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>; defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>; defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>; -defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>; +let Constraints = "$dst = $src2", DisableEncoding="$src2", + isConvertibleToThreeAddress = 1 in { +defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>; +} } // End isCommutable = 1 defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">; @@ -2206,6 +2209,15 @@ def : Pat < (V_CNDMASK_B32_e64 $src2, $src1, $src0) >; +// Pattern for V_MAC_F32 +def : Pat < + (fmad (VOP3NoMods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3NoMods f32:$src1, i32:$src1_modifiers), + (VOP3NoMods f32:$src2, i32:$src2_modifiers)), + (V_MAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1, + $src2_modifiers, $src2, $clamp, $omod) +>; + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index cb9d7f7c59d..e7511e6a457 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -94,8 +94,15 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, // is vcc. We should handle this the same way we handle vopc, by addding // a register allocation hint pre-regalloc and then do the shrining // post-regalloc. - if (Src2) - return false; + if (Src2) { + if (MI.getOpcode() != AMDGPU::V_MAC_F32_e64) + return false; + + const MachineOperand *Src2Mod = + TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers); + if (!isVGPR(Src2, TRI, MRI) || (Src2Mod && Src2Mod->getImm() != 0)) + return false; + } const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); const MachineOperand *Src1Mod = @@ -259,6 +266,11 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (Src1) Inst32.addOperand(*Src1); + const MachineOperand *Src2 = + TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (Src2) + Inst32.addOperand(*Src2); + ++NumInstructionsShrunk; MI.eraseFromParent(); |