diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 64 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 45 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 1 |
9 files changed, 127 insertions, 46 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 9361b25db14..4a82d3a5879 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -762,14 +762,23 @@ static bool tryFoldInst(const SIInstrInfo *TII, Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) { const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1); - if (Src1->isIdenticalTo(*Src0)) { + int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers); + int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + if (Src1->isIdenticalTo(*Src0) && + (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) && + (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) { LLVM_DEBUG(dbgs() << "Folded " << *MI << " into "); + auto &NewDesc = + TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false)); int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (Src2Idx != -1) MI->RemoveOperand(Src2Idx); MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1)); - mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY - : getMovOpc(false))); + if (Src1ModIdx != -1) + MI->RemoveOperand(Src1ModIdx); + if (Src0ModIdx != -1) + MI->RemoveOperand(Src0ModIdx); + mutateCopyOp(*MI, NewDesc); LLVM_DEBUG(dbgs() << *MI << '\n'); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6f48a2eabc5..38f27c5ec65 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3437,11 +3437,15 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) .addReg(SrcCond); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo) + .addImm(0) .addReg(Src0, 0, AMDGPU::sub0) + .addImm(0) .addReg(Src1, 0, AMDGPU::sub0) .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi) + .addImm(0) .addReg(Src0, 0, AMDGPU::sub1) + .addImm(0) .addReg(Src1, 0, AMDGPU::sub1) .addReg(SrcCondCopy); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0d90309c3d3..196ecd70f0d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -693,7 +693,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); } else if (Cond.size() == 2) { @@ -705,7 +707,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, .addImm(-1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); break; @@ -716,7 +720,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, .addImm(0) .addImm(-1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); break; @@ -728,7 +734,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); break; @@ -740,7 +748,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(TrueReg) + .addImm(0) .addReg(FalseReg) .addReg(SReg); break; @@ -754,7 +764,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, .addImm(-1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); break; @@ -768,7 +780,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, .addImm(0) .addImm(-1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addImm(0) .addReg(FalseReg) + .addImm(0) .addReg(TrueReg) .addReg(SReg); llvm_unreachable("Unhandled branch predicate EXECZ"); @@ -2579,7 +2593,8 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI, // Can't shrink instruction with three operands. // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add // a special case for it. It can only be shrunk if the third operand - // is vcc. We should handle this the same way we handle vopc, by addding + // is vcc, and src0_modifiers and src1_modifiers are not set. + // We should handle this the same way we handle vopc, by addding // a register allocation hint pre-regalloc and then do the shrinking // post-regalloc. if (Src2) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index c3d9ff7310f..680c287e0e9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1283,7 +1283,7 @@ class isModifierType<ValueType SrcVT> { } // Return type of input modifiers operand for specified input operand -class getSrcMod <ValueType VT> { +class getSrcMod <ValueType VT, bit EnableF32SrcMods> { bit isFP = !if(!eq(VT.Value, f16.Value), 1, !if(!eq(VT.Value, f32.Value), 1, !if(!eq(VT.Value, f64.Value), 1, @@ -1296,7 +1296,7 @@ class getSrcMod <ValueType VT> { FP16InputMods, FP32InputMods ), - Int32InputMods) + !if(EnableF32SrcMods, FP32InputMods, Int32InputMods)) ); } @@ -1331,7 +1331,7 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> { // Returns the input arguments for VOP3 instructions for the given SrcVT. class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, - bit HasIntClamp, bit HasModifiers, bit HasOMod, + bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { dag ret = @@ -1369,16 +1369,33 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, /* endif */ ) /* NumSrcArgs == 3 */, !if (!eq(HasModifiers, 1), - // VOP3 with modifiers - !if (!eq(HasOMod, 1), - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - Src2Mod:$src2_modifiers, Src2RC:$src2, - clampmod:$clamp, omod:$omod), - (ins Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - Src2Mod:$src2_modifiers, Src2RC:$src2, - clampmod:$clamp)) + !if (!eq(HasSrc2Mods, 1), + // VOP3 with modifiers + !if (!eq(HasOMod, 1), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2, + clampmod:$clamp, omod:$omod), + !if (!eq(HasIntClamp, 1), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2, + clampmod:$clamp), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2))), + // VOP3 with modifiers except src2 + !if (!eq(HasOMod, 1), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2RC:$src2, clampmod:$clamp, omod:$omod), + !if (!eq(HasIntClamp, 1), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2RC:$src2, clampmod:$clamp), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2RC:$src2)))) /* else */, // VOP3 without modifiers !if (!eq(HasIntClamp, 1), @@ -1743,9 +1760,10 @@ def PatGenMode { int Pattern = 1; } -class VOPProfile <list<ValueType> _ArgVT> { +class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0> { field list<ValueType> ArgVT = _ArgVT; + field bit EnableF32SrcMods = _EnableF32SrcMods; field ValueType DstVT = ArgVT[0]; field ValueType Src0VT = ArgVT[1]; @@ -1763,9 +1781,9 @@ class VOPProfile <list<ValueType> _ArgVT> { field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; - field Operand Src0Mod = getSrcMod<Src0VT>.ret; - field Operand Src1Mod = getSrcMod<Src1VT>.ret; - field Operand Src2Mod = getSrcMod<Src2VT>.ret; + field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret; + field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret; + field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret; field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret; field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret; field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; @@ -1781,12 +1799,16 @@ class VOPProfile <list<ValueType> _ArgVT> { field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1); // TODO: Modifiers logic is somewhat adhoc here, to be refined later - field bit HasModifiers = isModifierType<Src0VT>.ret; + // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which + // enables modifiers for i32 type. + field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret; + // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods. field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret; field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret; field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret; + // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods. field bit HasSrc0IntMods = isIntType<Src0VT>.ret; field bit HasSrc1IntMods = isIntType<Src1VT>.ret; field bit HasSrc2IntMods = isIntType<Src2VT>.ret; @@ -1795,7 +1817,7 @@ class VOPProfile <list<ValueType> _ArgVT> { field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0); field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0); - field bit HasClamp = HasModifiers; + field bit HasClamp = isModifierType<Src0VT>.ret; field bit HasSDWAClamp = EmitDst; field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret; field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp); @@ -1829,8 +1851,8 @@ class VOPProfile <list<ValueType> _ArgVT> { field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, - HasIntClamp, HasModifiers, HasOMod, Src0Mod, Src1Mod, - Src2Mod>.ret; + HasIntClamp, HasModifiers, HasSrc2Mods, + HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, HasClamp, Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 88acebb8969..a2b6f76da26 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -698,7 +698,7 @@ def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> { multiclass SelectPat <ValueType vt, Instruction inst> { def : GCNPat < (vt (select i1:$src0, vt:$src1, vt:$src2)), - (inst $src2, $src1, $src0) + (inst (i32 0), $src2, (i32 0), $src1, $src0) >; } @@ -1104,12 +1104,14 @@ def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>; def : GCNPat < (i32 (sext i1:$src0)), - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), $src0) >; class Ext32Pat <SDNode ext> : GCNPat < (i32 (ext i1:$src0)), - (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 1), $src0) >; def : Ext32Pat <zext>; @@ -1240,8 +1242,9 @@ def : GCNPat < class ZExt_i64_i1_Pat <SDNode ext> : GCNPat < (i64 (ext i1:$src)), (REG_SEQUENCE VReg_64, - (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0, - (S_MOV_B32 (i32 0)), sub1) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 1), $src), + sub0, (S_MOV_B32 (i32 0)), sub1) >; @@ -1259,8 +1262,10 @@ def : GCNPat < def : GCNPat < (i64 (sext i1:$src)), (REG_SEQUENCE VReg_64, - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0, - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub0, + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub1) >; class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : GCNPat < @@ -1318,32 +1323,46 @@ def : GCNPat < def : GCNPat < (f16 (sint_to_fp i1:$src)), - (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)) + (V_CVT_F16_F32_e32 ( + V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), + $src)) >; def : GCNPat < (f16 (uint_to_fp i1:$src)), - (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)) + (V_CVT_F16_F32_e32 ( + V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), + $src)) >; def : GCNPat < (f32 (sint_to_fp i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), + $src) >; def : GCNPat < (f32 (uint_to_fp i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), + $src) >; def : GCNPat < (f64 (sint_to_fp i1:$src)), - (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) + (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), + $src)) >; def : GCNPat < (f64 (uint_to_fp i1:$src)), - (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)) + (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 1), + $src)) >; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp index 25536176971..823c9040c87 100644 --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -483,6 +483,8 @@ void SILowerI1Copies::lowerCopiesFromI1() { ConstrainRegs.insert(SrcReg); BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) + .addImm(0) + .addImm(0) .addImm(-1) .addReg(SrcReg); DeadCopies.push_back(&MI); diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 783232071e9..bc30b29a396 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -171,6 +171,10 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) return AMDGPU::NoRegister; + if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) || + TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers)) + return AMDGPU::NoRegister; + Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0); Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1); MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2); diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 9a0a81c97ef..1cb9bdb77ab 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -245,7 +245,8 @@ def VOP_MADMK_F32 : VOP_MADMK <f32>; class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3, - 0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; + 0, HasModifiers, HasModifiers, HasOMod, + Src0Mod, Src1Mod, Src2Mod>.ret; let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, VGPR_32:$src2, // stub argument @@ -324,11 +325,12 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let HasExtSDWA9 = 1; } -// Read in from vcc or arbitrary SGPR -def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { +// Read in from vcc or arbitrary SGPR. +// Enable f32 source modifiers on i32 input type. +def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> { let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. let Asm32 = "$vdst, $src0, $src1, vcc"; - let Asm64 = "$vdst, $src0, $src1, $src2"; + let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; @@ -347,8 +349,8 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { src0_sel:$src0_sel, src1_sel:$src1_sel); let InsDPP = (ins DstRCDPP:$old, - Src0DPP:$src0, - Src1DPP:$src1, + Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let HasExt = 1; @@ -644,7 +646,9 @@ def : GCNPat< class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < (i16 (ext i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) + (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), + (i32 0/*src1mod*/), (i32 1/*src1*/), + $src) >; let Predicates = [Has16BitInsts] in { @@ -681,7 +685,8 @@ def : ZExt_i16_i1_Pat<anyext>; def : GCNPat < (i16 (sext i1:$src)), - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src) + (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), + /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) >; // Undo sub x, c -> add x, -c canonicalization since c is more likely diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index babd0417ecc..cc3de25eec2 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -190,6 +190,7 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> { // v_div_scale_{f32|f64} do not support input modifiers. let HasModifiers = 0; + let HasClamp = 0; let HasOMod = 0; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; |