summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-07-22 17:01:21 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-07-22 17:01:21 +0000
commit7fb961f3e6072c4d4253df9362e28cbb760713a8 (patch)
treec7a218fa83d3825c59489f3c35d22f6b610e3a83 /llvm/lib/Target/AMDGPU
parentd40ded6681c486acf53eabace2a186eb154bab33 (diff)
downloadbcm5719-llvm-7fb961f3e6072c4d4253df9362e28cbb760713a8.tar.gz
bcm5719-llvm-7fb961f3e6072c4d4253df9362e28cbb760713a8.zip
AMDGPU: Fix i1 fp_to_int
R600's i1 fp_to_uint selected but was incorrect according to what instcombine constant folds to. llvm-svn: 276435
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td3
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td10
4 files changed, 34 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 6761b4b5df9..3944fdbd31e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -420,9 +420,10 @@ int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
-int FP32_NEG_ONE = 0xbf800000;
int FP32_ONE = 0x3f800000;
+int FP32_NEG_ONE = 0xbf800000;
int FP64_ONE = 0x3ff0000000000000;
+int FP64_NEG_ONE = 0xbff0000000000000;
}
def CONST : Constants;
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 8f78edd76a5..8ccd176930a 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -122,6 +122,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
@@ -832,13 +833,18 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::FP_TO_UINT:
if (N->getValueType(0) == MVT::i1) {
- Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
return;
}
// Fall-through. Since we don't care about out of bounds values
// we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
// considers some extra cases which are not necessary here.
case ISD::FP_TO_SINT: {
+ if (N->getValueType(0) == MVT::i1) {
+ Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
+ return;
+ }
+
SDValue Result;
if (expandFP_TO_SINT(N, Result, DAG))
Results.push_back(Result);
@@ -1052,15 +1058,24 @@ SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
}
-SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
+SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(
+ ISD::SETCC,
+ DL,
+ MVT::i1,
+ Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
+ DAG.getCondCode(ISD::SETEQ));
+}
+
+SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(
ISD::SETCC,
DL,
MVT::i1,
- Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
- DAG.getCondCode(ISD::SETNE)
- );
+ Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
+ DAG.getCondCode(ISD::SETEQ));
}
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index 2fb6ee25caa..9700ce14c6f 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -72,7 +72,8 @@ private:
SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f1e8c232737..514f2f5e35a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3401,6 +3401,16 @@ def : Pat <
(V_CNDMASK_B32_e64 0, -1, $src), sub1)
>;
+class FPToI1Pat<Instruction Inst, int KOne, ValueType vt, SDPatternOperator fp_to_int> : Pat <
+ (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
+ (i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
+>;
+
+def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, f32, fp_to_uint>;
+def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, f32, fp_to_sint>;
+def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, f64, fp_to_uint>;
+def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, f64, fp_to_sint>;
+
// If we need to perform a logical operation on i1 values, we need to
// use vector comparisons since there is only one SCC register. Vector
// comparisions still write to a pair of SGPRs, so treat these as
OpenPOWER on IntegriCloud