diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-04-25 21:17:38 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-04-25 21:17:38 +0000 |
commit | df58e825ad1bf0ddff869c19ec989068fad7b532 (patch) | |
tree | 567148f96654d00c8972fc1454028b3bdd7af947 /llvm/lib | |
parent | 227c901dd822de8ba98e49954e5d6b6b35425341 (diff) | |
download | bcm5719-llvm-df58e825ad1bf0ddff869c19ec989068fad7b532.tar.gz bcm5719-llvm-df58e825ad1bf0ddff869c19ec989068fad7b532.zip |
AMDGPU: Clean up VOP3NoMods pattern
There is no need to copy the operands or inspect the sources.
Also remove some unnecessary clamp/omod usage.
llvm-svn: 301363
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 19 |
3 files changed, 22 insertions, 35 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index eef59b343df..88f91297b32 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -153,7 +153,7 @@ private: bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, @@ -1667,38 +1667,28 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, return isNoNanSrc(Src); } -bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, - SDValue &SrcMods) const { - bool Res = SelectVOP3Mods(In, Src, SrcMods); - return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); +bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { + if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) + return false; + + Src = In; + return true; } bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const { SDLoc DL(In); - // FIXME: Handle Clamp and Omod - Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); - Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); return SelectVOP3Mods(In, Src, SrcMods); } -bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, - SDValue &SrcMods, SDValue &Clamp, - SDValue &Omod) const { - bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); - - return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && - cast<ConstantSDNode>(Clamp)->isNullValue() && - cast<ConstantSDNode>(Omod)->isNullValue(); -} - bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Omod) const { - // FIXME: Handle Omod - Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); + Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i1); return SelectVOP3Mods(In, Src, SrcMods); } @@ -1716,9 +1706,8 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, Src = In; SDLoc DL(In); - // FIXME: Handle Clamp and Omod - Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); - Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index c6daf743f3a..7b052844f17 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -646,11 +646,10 @@ def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">; def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">; def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; -def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">; def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">; def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">; def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; -def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">; +def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; // VOP3Mods, but the input source is known to never be NaN. def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 89c815c5262..3f6ddec7047 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -472,8 +472,8 @@ def : Pat < // fp_to_fp16 patterns def : Pat < - (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))), - (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod) + (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), + (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; def : Pat < @@ -502,11 +502,11 @@ def : Pat < multiclass FMADPat <ValueType vt, Instruction inst> { def : Pat < - (vt (fmad (VOP3NoMods0 vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3NoMods vt:$src1, i32:$src1_modifiers), - (VOP3NoMods vt:$src2, i32:$src2_modifiers))), - (inst $src0_modifiers, $src0, $src1_modifiers, $src1, - $src2_modifiers, $src2, $clamp, $omod) + (vt (fmad (VOP3NoMods vt:$src0), + (VOP3NoMods vt:$src1), + (VOP3NoMods vt:$src2))), + (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, + SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; } @@ -676,10 +676,9 @@ def : BitConvert <v16f32, v16i32, VReg_512>; // If denormals are not enabled, it only impacts the compare of the // inputs. The output result is not flushed. class ClampPat<Instruction inst, ValueType vt> : Pat < - (vt (AMDGPUclamp - (VOP3Mods0Clamp vt:$src0, i32:$src0_modifiers, i32:$omod))), + (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))), (inst i32:$src0_modifiers, vt:$src0, - i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod) + i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE) >; def : ClampPat<V_MAX_F32_e64, f32>; |