summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/R600
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/R600')
-rw-r--r--llvm/lib/Target/R600/AMDGPUISelLowering.cpp5
-rw-r--r--llvm/lib/Target/R600/AMDGPUInstrInfo.td7
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.td22
-rw-r--r--llvm/lib/Target/R600/SIInstructions.td19
4 files changed, 46 insertions, 7 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
index d577d689f66..d96f03aaeb0 100644
--- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -907,10 +907,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::AMDGPU_div_fmas:
- // FIXME: Dropping bool parameter. Work is needed to support the implicit
- // read from VCC.
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4));
case Intrinsic::AMDGPU_div_fixup:
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
diff --git a/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/llvm/lib/Target/R600/AMDGPUInstrInfo.td
index 0e34392bd50..d657ad05c8c 100644
--- a/llvm/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/R600/AMDGPUInstrInfo.td
@@ -35,6 +35,11 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
+// float, float, float, vcc
+def AMDGPUFmasOp : SDTypeProfile<1, 4,
+ [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
+>;
+
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
@@ -153,7 +158,7 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
// Special case divide FMA with scale and flags (src0 = Quotient,
// src1 = Denominator, src2 = Numerator).
-def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
// Single or double precision division fixup.
// Special case divide fixup and flags(src0 = Quotient, src1 =
diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td
index cb16be5f2f7..4082b570c41 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/llvm/lib/Target/R600/SIInstrInfo.td
@@ -1300,6 +1300,28 @@ multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
P.NumSrcArgs, P.HasModifiers
>;
+// Special case for v_div_fmas_{f32|f64}, since it seems to be the
+// only VOP instruction that implicitly reads VCC.
+multiclass VOP3_VCC_Inst <vop3 op, string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag> : VOP3_Helper <
+ op, opName,
+ P.Outs,
+ (ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
+ InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
+ InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
+ ClampMod:$clamp,
+ omod:$omod),
+ " $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
+ (i1 VCC)))],
+ 3, 1
+>;
+
multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
string opName, list<dag> pattern> :
VOP3b_3_m <
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index 032d6c2abfc..0f14dc31136 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -1744,14 +1744,27 @@ let SchedRW = [WriteDouble, WriteSALU] in {
defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
} // let SchedRW = [WriteDouble]
-let isCommutable = 1 in {
-defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
+let isCommutable = 1, Uses = [VCC] in {
+
+// v_div_fmas_f32:
+// result = src0 * src1 + src2
+// if (vcc)
+// result *= 2^32
+//
+defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
>;
+
let SchedRW = [WriteDouble] in {
-defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
+// v_div_fmas_f64:
+// result = src0 * src1 + src2
+// if (vcc)
+// result *= 2^64
+//
+defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
>;
+
} // End SchedRW = [WriteDouble]
} // End isCommutable = 1
OpenPOWER on IntegriCloud