summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td1
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp43
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td10
4 files changed, 55 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 6e56b0b0502..2874a55bef3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -423,6 +423,7 @@ int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
int FP32_NEG_ONE = 0xbf800000;
int FP32_ONE = 0x3f800000;
+int FP64_ONE = 0x3ff0000000000000;
}
def CONST : Constants;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 64975cf1809..9aaa31c29fe 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -281,6 +281,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::UINT_TO_FP);
+ setTargetDAGCombine(ISD::FCANONICALIZE);
// All memory operations. Some folding on the pointer operand is done to help
// matching the constant offsets in the addressing modes.
@@ -2400,6 +2401,46 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
return SDValue();
}
+// Constant fold canonicalize.
+SDValue SITargetLowering::performFCanonicalizeCombine(
+ SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
+ if (!CFP)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ const APFloat &C = CFP->getValueAPF();
+
+ // Flush denormals to 0 if not enabled.
+ if (C.isDenormal()) {
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::f32 && !Subtarget->hasFP32Denormals())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
+ if (VT == MVT::f64 && !Subtarget->hasFP64Denormals())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+ }
+
+ if (C.isNaN()) {
+ EVT VT = N->getValueType(0);
+ APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
+ if (C.isSignaling()) {
+ // Quiet a signaling NaN.
+ return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
+ }
+
+ // Make sure it is the canonical NaN bitpattern.
+ //
+ // TODO: Can we use -1 as the canonical NaN value since it's an inline
+ // immediate?
+ if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt())
+ return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
+ }
+
+ return SDValue(CFP, 0);
+}
+
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
switch (Opc) {
case ISD::FMAXNUM:
@@ -2747,6 +2788,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performOrCombine(N, DCI);
case AMDGPUISD::FP_CLASS:
return performClassCombine(N, DCI);
+ case ISD::FCANONICALIZE:
+ return performFCanonicalizeCombine(N, DCI);
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 96c2503f400..4e7131a0b5e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -55,6 +55,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 3363fcca9ab..4620ec05752 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3515,6 +3515,16 @@ def : Pat <
>;
}
+def : Pat<
+ (fcanonicalize f32:$src),
+ (V_MUL_F32_e64 0, CONST.FP32_ONE, 0, $src, 0, 0)
+>;
+
+def : Pat<
+ (fcanonicalize f64:$src),
+ (V_MUL_F64 0, CONST.FP64_ONE, 0, $src, 0, 0)
+>;
+
//===----------------------------------------------------------------------===//
// Fract Patterns
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud