diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-09-20 20:28:39 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-09-20 20:28:39 +0000 |
| commit | 76935122cce2b45d5a48766e4840b8f908bfa126 (patch) | |
| tree | 2ad91591576ba12258ad1147e055a72162fa9cdc /llvm/lib/Target | |
| parent | e85de8fcf9c0ea5f878140c2801be67a3663a518 (diff) | |
| download | bcm5719-llvm-76935122cce2b45d5a48766e4840b8f908bfa126.tar.gz bcm5719-llvm-76935122cce2b45d5a48766e4840b8f908bfa126.zip | |
AMDGPU: Start selecting v_mad_mixlo_f16
Also add some tests that should be able to use v_mad_mixhi_f16,
but do not yet. This is trickier because we don't really model
the partial update of the register done by 16-bit instructions.
llvm-svn: 313806
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 16 |
2 files changed, 25 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 3ad19694570..7faf3e123f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -197,6 +197,7 @@ private: bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp) const; bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; + bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; void SelectADD_SUB_I64(SDNode *N); void SelectUADDO_USUBO(SDNode *N); @@ -1990,6 +1991,14 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, return false; } +bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + unsigned Mods = 0; + SelectVOP3PMadMixModsImpl(In, Src, Mods); + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index d6820dac27f..dfcc91bf8d3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -68,10 +68,26 @@ def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I1 // For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi. let isCommutable = 1 in { def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>; + +// Clamp modifier is applied after conversion to f16. def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>; def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>; } +let Predicates = [HasMadMix] in { + +def : Pat < + (f16 (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), + (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), + (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))), + (V_MAD_MIXLO_F16 $src0_modifiers, $src0, + $src1_modifiers, $src1, + $src2_modifiers, $src2, + 0) +>; + +} // End Predicates = [HasMadMix] + multiclass VOP3P_Real_vi<bits<10> op> { def _vi : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.VI>, VOP3Pe <op, !cast<VOP3P_Pseudo>(NAME).Pfl> { |

