summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-04-25 21:17:38 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-04-25 21:17:38 +0000
commitdf58e825ad1bf0ddff869c19ec989068fad7b532 (patch)
tree567148f96654d00c8972fc1454028b3bdd7af947 /llvm/lib
parent227c901dd822de8ba98e49954e5d6b6b35425341 (diff)
downloadbcm5719-llvm-df58e825ad1bf0ddff869c19ec989068fad7b532.tar.gz
bcm5719-llvm-df58e825ad1bf0ddff869c19ec989068fad7b532.zip
AMDGPU: Clean up VOP3NoMods pattern
There is no need to copy the operands or inspect the sources. Also remove some unnecessary clamp/omod usage. llvm-svn: 301363
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td19
3 files changed, 22 insertions, 35 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index eef59b343df..88f91297b32 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -153,7 +153,7 @@ private:
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
@@ -1667,38 +1667,28 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
return isNoNanSrc(Src);
}
-bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
- SDValue &SrcMods) const {
- bool Res = SelectVOP3Mods(In, Src, SrcMods);
- return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
+bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
+ if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
+ return false;
+
+ Src = In;
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
SDLoc DL(In);
- // FIXME: Handle Clamp and Omod
- Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
return SelectVOP3Mods(In, Src, SrcMods);
}
-bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
- SDValue &SrcMods, SDValue &Clamp,
- SDValue &Omod) const {
- bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
-
- return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
- cast<ConstantSDNode>(Clamp)->isNullValue() &&
- cast<ConstantSDNode>(Omod)->isNullValue();
-}
-
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
SDValue &SrcMods,
SDValue &Omod) const {
- // FIXME: Handle Omod
- Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
+ Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i1);
return SelectVOP3Mods(In, Src, SrcMods);
}
@@ -1716,9 +1706,8 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
Src = In;
SDLoc DL(In);
- // FIXME: Handle Clamp and Omod
- Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index c6daf743f3a..7b052844f17 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -646,11 +646,10 @@ def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
-def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">;
def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
-def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
+def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 89c815c5262..3f6ddec7047 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -472,8 +472,8 @@ def : Pat <
// fp_to_fp16 patterns
def : Pat <
- (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))),
- (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod)
+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
+ (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
@@ -502,11 +502,11 @@ def : Pat <
multiclass FMADPat <ValueType vt, Instruction inst> {
def : Pat <
- (vt (fmad (VOP3NoMods0 vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
- (VOP3NoMods vt:$src1, i32:$src1_modifiers),
- (VOP3NoMods vt:$src2, i32:$src2_modifiers))),
- (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
- $src2_modifiers, $src2, $clamp, $omod)
+ (vt (fmad (VOP3NoMods vt:$src0),
+ (VOP3NoMods vt:$src1),
+ (VOP3NoMods vt:$src2))),
+ (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
+ SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
}
@@ -676,10 +676,9 @@ def : BitConvert <v16f32, v16i32, VReg_512>;
// If denormals are not enabled, it only impacts the compare of the
// inputs. The output result is not flushed.
class ClampPat<Instruction inst, ValueType vt> : Pat <
- (vt (AMDGPUclamp
- (VOP3Mods0Clamp vt:$src0, i32:$src0_modifiers, i32:$omod))),
+ (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))),
(inst i32:$src0_modifiers, vt:$src0,
- i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod)
+ i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE)
>;
def : ClampPat<V_MAX_F32_e64, f32>;
OpenPOWER on IntegriCloud