summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td64
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td45
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td21
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td1
9 files changed, 127 insertions, 46 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9361b25db14..4a82d3a5879 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -762,14 +762,23 @@ static bool tryFoldInst(const SIInstrInfo *TII,
Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
- if (Src1->isIdenticalTo(*Src0)) {
+ int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
+ int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ if (Src1->isIdenticalTo(*Src0) &&
+ (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) &&
+ (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) {
LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
+ auto &NewDesc =
+ TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (Src2Idx != -1)
MI->RemoveOperand(Src2Idx);
MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
- mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
- : getMovOpc(false)));
+ if (Src1ModIdx != -1)
+ MI->RemoveOperand(Src1ModIdx);
+ if (Src0ModIdx != -1)
+ MI->RemoveOperand(Src0ModIdx);
+ mutateCopyOp(*MI, NewDesc);
LLVM_DEBUG(dbgs() << *MI << '\n');
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6f48a2eabc5..38f27c5ec65 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3437,11 +3437,15 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
.addReg(SrcCond);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+ .addImm(0)
.addReg(Src0, 0, AMDGPU::sub0)
+ .addImm(0)
.addReg(Src1, 0, AMDGPU::sub0)
.addReg(SrcCondCopy);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+ .addImm(0)
.addReg(Src0, 0, AMDGPU::sub1)
+ .addImm(0)
.addReg(Src1, 0, AMDGPU::sub1)
.addReg(SrcCondCopy);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0d90309c3d3..196ecd70f0d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -693,7 +693,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(Cond[0]);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
} else if (Cond.size() == 2) {
@@ -705,7 +707,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
.addImm(-1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
@@ -716,7 +720,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
.addImm(0)
.addImm(-1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
@@ -728,7 +734,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
@@ -740,7 +748,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(TrueReg)
+ .addImm(0)
.addReg(FalseReg)
.addReg(SReg);
break;
@@ -754,7 +764,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
.addImm(-1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
@@ -768,7 +780,9 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
.addImm(0)
.addImm(-1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
llvm_unreachable("Unhandled branch predicate EXECZ");
@@ -2579,7 +2593,8 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
// Can't shrink instruction with three operands.
// FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
// a special case for it. It can only be shrunk if the third operand
- // is vcc. We should handle this the same way we handle vopc, by addding
+ // is vcc, and src0_modifiers and src1_modifiers are not set.
+ // We should handle this the same way we handle vopc, by addding
// a register allocation hint pre-regalloc and then do the shrinking
// post-regalloc.
if (Src2) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index c3d9ff7310f..680c287e0e9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1283,7 +1283,7 @@ class isModifierType<ValueType SrcVT> {
}
// Return type of input modifiers operand for specified input operand
-class getSrcMod <ValueType VT> {
+class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
@@ -1296,7 +1296,7 @@ class getSrcMod <ValueType VT> {
FP16InputMods,
FP32InputMods
),
- Int32InputMods)
+ !if(EnableF32SrcMods, FP32InputMods, Int32InputMods))
);
}
@@ -1331,7 +1331,7 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
// Returns the input arguments for VOP3 instructions for the given SrcVT.
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
- bit HasIntClamp, bit HasModifiers, bit HasOMod,
+ bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret =
@@ -1369,16 +1369,33 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
/* endif */ )
/* NumSrcArgs == 3 */,
!if (!eq(HasModifiers, 1),
- // VOP3 with modifiers
- !if (!eq(HasOMod, 1),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp, omod:$omod),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp))
+ !if (!eq(HasSrc2Mods, 1),
+ // VOP3 with modifiers
+ !if (!eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp, omod:$omod),
+ !if (!eq(HasIntClamp, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2))),
+ // VOP3 with modifiers except src2
+ !if (!eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2RC:$src2, clampmod:$clamp, omod:$omod),
+ !if (!eq(HasIntClamp, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2RC:$src2, clampmod:$clamp),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2RC:$src2))))
/* else */,
// VOP3 without modifiers
!if (!eq(HasIntClamp, 1),
@@ -1743,9 +1760,10 @@ def PatGenMode {
int Pattern = 1;
}
-class VOPProfile <list<ValueType> _ArgVT> {
+class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0> {
field list<ValueType> ArgVT = _ArgVT;
+ field bit EnableF32SrcMods = _EnableF32SrcMods;
field ValueType DstVT = ArgVT[0];
field ValueType Src0VT = ArgVT[1];
@@ -1763,9 +1781,9 @@ class VOPProfile <list<ValueType> _ArgVT> {
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
- field Operand Src0Mod = getSrcMod<Src0VT>.ret;
- field Operand Src1Mod = getSrcMod<Src1VT>.ret;
- field Operand Src2Mod = getSrcMod<Src2VT>.ret;
+ field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
+ field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
+ field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
@@ -1781,12 +1799,16 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
- field bit HasModifiers = isModifierType<Src0VT>.ret;
+ // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which
+ // enables modifiers for i32 type.
+ field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret;
+ // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
+ // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods.
field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
@@ -1795,7 +1817,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
- field bit HasClamp = HasModifiers;
+ field bit HasClamp = isModifierType<Src0VT>.ret;
field bit HasSDWAClamp = EmitDst;
field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
@@ -1829,8 +1851,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasIntClamp, HasModifiers, HasOMod, Src0Mod, Src1Mod,
- Src2Mod>.ret;
+ HasIntClamp, HasModifiers, HasSrc2Mods,
+ HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 88acebb8969..a2b6f76da26 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -698,7 +698,7 @@ def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
multiclass SelectPat <ValueType vt, Instruction inst> {
def : GCNPat <
(vt (select i1:$src0, vt:$src1, vt:$src2)),
- (inst $src2, $src1, $src0)
+ (inst (i32 0), $src2, (i32 0), $src1, $src0)
>;
}
@@ -1104,12 +1104,14 @@ def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
def : GCNPat <
(i32 (sext i1:$src0)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src0)
>;
class Ext32Pat <SDNode ext> : GCNPat <
(i32 (ext i1:$src0)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 1), $src0)
>;
def : Ext32Pat <zext>;
@@ -1240,8 +1242,9 @@ def : GCNPat <
class ZExt_i64_i1_Pat <SDNode ext> : GCNPat <
(i64 (ext i1:$src)),
(REG_SEQUENCE VReg_64,
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
- (S_MOV_B32 (i32 0)), sub1)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 1), $src),
+ sub0, (S_MOV_B32 (i32 0)), sub1)
>;
@@ -1259,8 +1262,10 @@ def : GCNPat <
def : GCNPat <
(i64 (sext i1:$src)),
(REG_SEQUENCE VReg_64,
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0,
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub0,
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub1)
>;
class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : GCNPat <
@@ -1318,32 +1323,46 @@ def : GCNPat <
def : GCNPat <
(f16 (sint_to_fp i1:$src)),
- (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src))
+ (V_CVT_F16_F32_e32 (
+ V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
+ $src))
>;
def : GCNPat <
(f16 (uint_to_fp i1:$src)),
- (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src))
+ (V_CVT_F16_F32_e32 (
+ V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
+ $src))
>;
def : GCNPat <
(f32 (sint_to_fp i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
+ $src)
>;
def : GCNPat <
(f32 (uint_to_fp i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
+ $src)
>;
def : GCNPat <
(f64 (sint_to_fp i1:$src)),
- (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
+ (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1),
+ $src))
>;
def : GCNPat <
(f64 (uint_to_fp i1:$src)),
- (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
+ (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 1),
+ $src))
>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 25536176971..823c9040c87 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -483,6 +483,8 @@ void SILowerI1Copies::lowerCopiesFromI1() {
ConstrainRegs.insert(SrcReg);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
+ .addImm(0)
+ .addImm(0)
.addImm(-1)
.addReg(SrcReg);
DeadCopies.push_back(&MI);
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 783232071e9..bc30b29a396 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -171,6 +171,10 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
return AMDGPU::NoRegister;
+ if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
+ TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers))
+ return AMDGPU::NoRegister;
+
Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 9a0a81c97ef..1cb9bdb77ab 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -245,7 +245,8 @@ def VOP_MADMK_F32 : VOP_MADMK <f32>;
class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
- 0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
+ 0, HasModifiers, HasModifiers, HasOMod,
+ Src0Mod, Src1Mod, Src2Mod>.ret;
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
VGPR_32:$src2, // stub argument
@@ -324,11 +325,12 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
let HasExtSDWA9 = 1;
}
-// Read in from vcc or arbitrary SGPR
-def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
+// Read in from vcc or arbitrary SGPR.
+// Enable f32 source modifiers on i32 input type.
+def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> {
let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
let Asm32 = "$vdst, $src0, $src1, vcc";
- let Asm64 = "$vdst, $src0, $src1, $src2";
+ let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
@@ -347,8 +349,8 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
src0_sel:$src0_sel, src1_sel:$src1_sel);
let InsDPP = (ins DstRCDPP:$old,
- Src0DPP:$src0,
- Src1DPP:$src1,
+ Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let HasExt = 1;
@@ -644,7 +646,9 @@ def : GCNPat<
class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
(i16 (ext i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)
+ (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/),
+ (i32 0/*src1mod*/), (i32 1/*src1*/),
+ $src)
>;
let Predicates = [Has16BitInsts] in {
@@ -681,7 +685,8 @@ def : ZExt_i16_i1_Pat<anyext>;
def : GCNPat <
(i16 (sext i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src)
>;
// Undo sub x, c -> add x, -c canonicalization since c is more likely
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index babd0417ecc..cc3de25eec2 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -190,6 +190,7 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
// v_div_scale_{f32|f64} do not support input modifiers.
let HasModifiers = 0;
+ let HasClamp = 0;
let HasOMod = 0;
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
OpenPOWER on IntegriCloud