summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp20
1 files changed, 13 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index f0b9c9e9455..53fb9e3cf1c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -372,13 +372,18 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
return true;
}
-static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
+static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
if (!CNum)
- return false;
+ return HasDenormals;
+
+ if (UnsafeDiv)
+ return true;
+
+ bool IsOne = CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0);
// Reciprocal f32 is handled separately without denormals.
- return UnsafeDiv || CNum->isExactlyValue(+1.0);
+ return HasDenormals ^ IsOne;
}
// Insert an intrinsic for fast fdiv for safe math situations where we can
@@ -404,7 +409,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
FMF.allowReciprocal();
// With UnsafeDiv node will be optimized to just rcp and mul.
- if (ST->hasFP32Denormals() || UnsafeDiv)
+ if (UnsafeDiv)
return false;
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
@@ -418,6 +423,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Value *NewFDiv = nullptr;
+ bool HasDenormals = ST->hasFP32Denormals();
if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
NewFDiv = UndefValue::get(VT);
@@ -428,7 +434,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Value *DenEltI = Builder.CreateExtractElement(Den, I);
Value *NewElt;
- if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) {
+ if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) {
NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
} else {
NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
@@ -437,7 +443,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
}
} else {
- if (!shouldKeepFDivF32(Num, UnsafeDiv))
+ if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals))
NewFDiv = Builder.CreateCall(Decl, { Num, Den });
}
@@ -447,7 +453,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
FDiv.eraseFromParent();
}
- return true;
+ return !!NewFDiv;
}
static bool hasUnsafeFPMath(const Function &F) {
OpenPOWER on IntegriCloud