summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-09-07 18:05:07 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-09-07 18:05:07 +0000
commitd7e2303df2c281d779d772ec5f6c8338afce5dd9 (patch)
tree4c9d7008eec11b6a56374ff4531e3c67dc8843c3 /llvm/lib
parent61ec738b60a4fb47ec9b7195de55f1ecb5cbdb45 (diff)
downloadbcm5719-llvm-d7e2303df2c281d779d772ec5f6c8338afce5dd9.tar.gz
bcm5719-llvm-d7e2303df2c281d779d772ec5f6c8338afce5dd9.zip
AMDGPU: Start selecting v_mad_mix_f32
llvm-svn: 312732
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp101
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td2
4 files changed, 105 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 20eb8100f1a..9b077bde614 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -721,6 +721,9 @@ def HasDPP : Predicate<"Subtarget->hasDPP()">,
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
AssemblerPredicate<"FeatureIntClamp">;
+def HasMadMix : Predicate<"Subtarget->hasMadMixInsts()">,
+ AssemblerPredicate<"FeatureGFX9Insts">;
+
class PredicateControl {
Predicate SubtargetPredicate;
Predicate SIAssemblerPredicate = isSICI;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index dfa26c871bc..1f70cdffc80 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -170,6 +170,7 @@ private:
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
@@ -195,6 +196,8 @@ private:
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp) const;
+ bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
+ bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
void SelectADD_SUB_I64(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
@@ -208,6 +211,7 @@ private:
void SelectS_BFE(SDNode *N);
bool isCBranchSCC(const SDNode *N) const;
void SelectBRCOND(SDNode *N);
+ void SelectFMAD(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
protected:
@@ -606,7 +610,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::BRCOND:
SelectBRCOND(N);
return;
-
+ case ISD::FMAD:
+ SelectFMAD(N);
+ return;
case AMDGPUISD::ATOMIC_CMP_SWAP:
SelectATOMIC_CMP_SWAP(N);
return;
@@ -1644,6 +1650,46 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
VCC.getValue(0));
}
+void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) {
+ MVT VT = N->getSimpleValueType(0);
+ if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) {
+ SelectCode(N);
+ return;
+ }
+
+ SDValue Src0 = N->getOperand(0);
+ SDValue Src1 = N->getOperand(1);
+ SDValue Src2 = N->getOperand(2);
+ unsigned Src0Mods, Src1Mods, Src2Mods;
+
+ // Avoid using v_mad_mix_f32 unless there is actually an operand using the
+ // conversion from f16.
+ bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
+ bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
+ bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
+
+ assert(!Subtarget->hasFP32Denormals() &&
+ "fmad selected with denormals enabled");
+ // TODO: We can select this with f32 denormals enabled if all the sources are
+ // converted from f16 (in which case fmad isn't legal).
+
+ if (Sel0 || Sel1 || Sel2) {
+ // For dummy operands.
+ SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
+ SDValue Ops[] = {
+ CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
+ CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
+ CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
+ CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
+ Zero, Zero
+ };
+
+ CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops);
+ } else {
+ SelectCode(N);
+ }
+}
+
// This is here because there isn't a way to use the generated sub0_sub1 as the
// subreg index to EXTRACT_SUBREG in tablegen.
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
@@ -1710,9 +1756,9 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
- SDValue &SrcMods) const {
- unsigned Mods = 0;
+bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
+ unsigned &Mods) const {
+ Mods = 0;
Src = In;
if (Src.getOpcode() == ISD::FNEG) {
@@ -1725,10 +1771,20 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
Src = Src.getOperand(0);
}
- SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods;
+ if (SelectVOP3ModsImpl(In, Src, Mods)) {
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
@@ -1908,6 +1964,41 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
return SelectVOP3OpSelMods(In, Src, SrcMods);
}
+// The return value is not whether the match is possible (which it always is),
+// but whether or not it a conversion is really used.
+bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
+ unsigned &Mods) const {
+ Mods = 0;
+ SelectVOP3ModsImpl(In, Src, Mods);
+
+ if (Src.getOpcode() == ISD::FP_EXTEND) {
+ Src = Src.getOperand(0);
+ assert(Src.getValueType() == MVT::f16);
+ Src = stripBitcast(Src);
+
+ // op_sel/op_sel_hi decide the source type and source.
+ // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
+ // If the sources's op_sel is set, it picks the high half of the source
+ // register.
+
+ Mods |= SISrcMods::OP_SEL_1;
+ if (isExtractHiElt(Src, Src))
+ Mods |= SISrcMods::OP_SEL_0;
+
+ return true;
+ }
+
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods = 0;
+ SelectVOP3PMadMixModsImpl(In, Src, Mods);
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 031476f94e5..c2ae2227830 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -313,6 +313,10 @@ public:
return getGeneration() >= GFX9;
}
+ bool hasMadMixInsts() const {
+ return getGeneration() >= GFX9;
+ }
+
bool hasCARRY() const {
return (getGeneration() >= EVERGREEN);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index b8a709d3c40..eb94c00e1ca 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -770,6 +770,8 @@ def VOP3OpSel0 : ComplexPattern<untyped, 3, "SelectVOP3OpSel0">;
def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
def VOP3OpSelMods0 : ComplexPattern<untyped, 3, "SelectVOP3OpSelMods0">;
+def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
+
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud