diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-09-07 18:05:07 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-09-07 18:05:07 +0000 |
| commit | d7e2303df2c281d779d772ec5f6c8338afce5dd9 (patch) | |
| tree | 4c9d7008eec11b6a56374ff4531e3c67dc8843c3 /llvm/lib | |
| parent | 61ec738b60a4fb47ec9b7195de55f1ecb5cbdb45 (diff) | |
| download | bcm5719-llvm-d7e2303df2c281d779d772ec5f6c8338afce5dd9.tar.gz bcm5719-llvm-d7e2303df2c281d779d772ec5f6c8338afce5dd9.zip | |
AMDGPU: Start selecting v_mad_mix_f32
llvm-svn: 312732
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 101 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 |
4 files changed, 105 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 20eb8100f1a..9b077bde614 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -721,6 +721,9 @@ def HasDPP : Predicate<"Subtarget->hasDPP()">, def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, AssemblerPredicate<"FeatureIntClamp">; +def HasMadMix : Predicate<"Subtarget->hasMadMixInsts()">, + AssemblerPredicate<"FeatureGFX9Insts">; + class PredicateControl { Predicate SubtargetPredicate; Predicate SIAssemblerPredicate = isSICI; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index dfa26c871bc..1f70cdffc80 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -170,6 +170,7 @@ private: bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, @@ -195,6 +196,8 @@ private: bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp) const; + bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; + bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; void SelectADD_SUB_I64(SDNode *N); void SelectUADDO_USUBO(SDNode *N); @@ -208,6 +211,7 @@ private: void SelectS_BFE(SDNode *N); bool isCBranchSCC(const SDNode *N) const; void SelectBRCOND(SDNode *N); + void SelectFMAD(SDNode *N); void SelectATOMIC_CMP_SWAP(SDNode *N); protected: @@ -606,7 +610,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::BRCOND: SelectBRCOND(N); return; - + case ISD::FMAD: + SelectFMAD(N); + return; case AMDGPUISD::ATOMIC_CMP_SWAP: SelectATOMIC_CMP_SWAP(N); return; @@ -1644,6 +1650,46 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { VCC.getValue(0)); } +void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) { + MVT VT = N->getSimpleValueType(0); + if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) { + SelectCode(N); + return; + } + + SDValue Src0 = N->getOperand(0); + SDValue Src1 = N->getOperand(1); + SDValue Src2 = N->getOperand(2); + unsigned Src0Mods, Src1Mods, Src2Mods; + + // Avoid using v_mad_mix_f32 unless there is actually an operand using the + // conversion from f16. + bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); + bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); + bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); + + assert(!Subtarget->hasFP32Denormals() && + "fmad selected with denormals enabled"); + // TODO: We can select this with f32 denormals enabled if all the sources are + // converted from f16 (in which case fmad isn't legal). + + if (Sel0 || Sel1 || Sel2) { + // For dummy operands. + SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); + SDValue Ops[] = { + CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, + CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, + CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, + CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), + Zero, Zero + }; + + CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops); + } else { + SelectCode(N); + } +} + // This is here because there isn't a way to use the generated sub0_sub1 as the // subreg index to EXTRACT_SUBREG in tablegen. void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { @@ -1710,9 +1756,9 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { CurDAG->RemoveDeadNode(N); } -bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, - SDValue &SrcMods) const { - unsigned Mods = 0; +bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, + unsigned &Mods) const { + Mods = 0; Src = In; if (Src.getOpcode() == ISD::FNEG) { @@ -1725,10 +1771,20 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, Src = Src.getOperand(0); } - SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); return true; } +bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + unsigned Mods; + if (SelectVOP3ModsImpl(In, Src, Mods)) { + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; + } + + return false; +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const { SelectVOP3Mods(In, Src, SrcMods); @@ -1908,6 +1964,41 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, return SelectVOP3OpSelMods(In, Src, SrcMods); } +// The return value is not whether the match is possible (which it always is), +// but whether or not it a conversion is really used. +bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, + unsigned &Mods) const { + Mods = 0; + SelectVOP3ModsImpl(In, Src, Mods); + + if (Src.getOpcode() == ISD::FP_EXTEND) { + Src = Src.getOperand(0); + assert(Src.getValueType() == MVT::f16); + Src = stripBitcast(Src); + + // op_sel/op_sel_hi decide the source type and source. + // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. + // If the sources's op_sel is set, it picks the high half of the source + // register. + + Mods |= SISrcMods::OP_SEL_1; + if (isExtractHiElt(Src, Src)) + Mods |= SISrcMods::OP_SEL_0; + + return true; + } + + return false; +} + +bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + unsigned Mods = 0; + SelectVOP3PMadMixModsImpl(In, Src, Mods); + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 031476f94e5..c2ae2227830 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -313,6 +313,10 @@ public: return getGeneration() >= GFX9; } + bool hasMadMixInsts() const { + return getGeneration() >= GFX9; + } + bool hasCARRY() const { return (getGeneration() >= EVERGREEN); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index b8a709d3c40..eb94c00e1ca 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -770,6 +770,8 @@ def VOP3OpSel0 : ComplexPattern<untyped, 3, "SelectVOP3OpSel0">; def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; def VOP3OpSelMods0 : ComplexPattern<untyped, 3, "SelectVOP3OpSelMods0">; +def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// |

