diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-22 17:01:21 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-22 17:01:21 +0000 |
commit | 7fb961f3e6072c4d4253df9362e28cbb760713a8 (patch) | |
tree | c7a218fa83d3825c59489f3c35d22f6b610e3a83 /llvm/lib/Target/AMDGPU | |
parent | d40ded6681c486acf53eabace2a186eb154bab33 (diff) | |
download | bcm5719-llvm-7fb961f3e6072c4d4253df9362e28cbb760713a8.tar.gz bcm5719-llvm-7fb961f3e6072c4d4253df9362e28cbb760713a8.zip |
AMDGPU: Fix i1 fp_to_int
R600's i1 fp_to_uint selected but was incorrect according to
what instcombine constant folds to.
llvm-svn: 276435
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 10 |
4 files changed, 34 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 6761b4b5df9..3944fdbd31e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -420,9 +420,10 @@ int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding -int FP32_NEG_ONE = 0xbf800000; int FP32_ONE = 0x3f800000; +int FP32_NEG_ONE = 0xbf800000; int FP64_ONE = 0x3ff0000000000000; +int FP64_NEG_ONE = 0xbff0000000000000; } def CONST : Constants; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 8f78edd76a5..8ccd176930a 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -122,6 +122,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); @@ -832,13 +833,18 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::FP_TO_UINT: if (N->getValueType(0) == MVT::i1) { - Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG)); return; } // Fall-through. Since we don't care about out of bounds values // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint // considers some extra cases which are not necessary here. case ISD::FP_TO_SINT: { + if (N->getValueType(0) == MVT::i1) { + Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG)); + return; + } + SDValue Result; if (expandFP_TO_SINT(N, Result, DAG)) Results.push_back(Result); @@ -1052,15 +1058,24 @@ SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); } -SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { +SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + return DAG.getNode( + ISD::SETCC, + DL, + MVT::i1, + Op, DAG.getConstantFP(1.0f, DL, MVT::f32), + DAG.getCondCode(ISD::SETEQ)); +} + +SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode( ISD::SETCC, DL, MVT::i1, - Op, DAG.getConstantFP(0.0f, DL, MVT::f32), - DAG.getCondCode(ISD::SETNE) - ); + Op, DAG.getConstantFP(-1.0f, DL, MVT::f32), + DAG.getCondCode(ISD::SETEQ)); } SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h index 2fb6ee25caa..9700ce14c6f 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -72,7 +72,8 @@ private: SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index f1e8c232737..514f2f5e35a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3401,6 +3401,16 @@ def : Pat < (V_CNDMASK_B32_e64 0, -1, $src), sub1) >; +class FPToI1Pat<Instruction Inst, int KOne, ValueType vt, SDPatternOperator fp_to_int> : Pat < + (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))), + (i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)) +>; + +def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, f32, fp_to_uint>; +def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, f32, fp_to_sint>; +def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, f64, fp_to_uint>; +def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, f64, fp_to_sint>; + // If we need to perform a logical operation on i1 values, we need to // use vector comparisons since there is only one SCC register. Vector // comparisions still write to a pair of SGPRs, so treat these as |