diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index f0b9c9e9455..53fb9e3cf1c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -372,13 +372,18 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32( return true; } -static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) { +static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) { const ConstantFP *CNum = dyn_cast<ConstantFP>(Num); if (!CNum) - return false; + return HasDenormals; + + if (UnsafeDiv) + return true; + + bool IsOne = CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0); // Reciprocal f32 is handled separately without denormals. - return UnsafeDiv || CNum->isExactlyValue(+1.0); + return HasDenormals ^ IsOne; } // Insert an intrinsic for fast fdiv for safe math situations where we can @@ -404,7 +409,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { FMF.allowReciprocal(); // With UnsafeDiv node will be optimized to just rcp and mul. - if (ST->hasFP32Denormals() || UnsafeDiv) + if (UnsafeDiv) return false; IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); @@ -418,6 +423,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { Value *NewFDiv = nullptr; + bool HasDenormals = ST->hasFP32Denormals(); if (VectorType *VT = dyn_cast<VectorType>(Ty)) { NewFDiv = UndefValue::get(VT); @@ -428,7 +434,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { Value *DenEltI = Builder.CreateExtractElement(Den, I); Value *NewElt; - if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) { + if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) { NewElt = Builder.CreateFDiv(NumEltI, DenEltI); } else { NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI }); @@ -437,7 +443,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I); } } else { - if (!shouldKeepFDivF32(Num, UnsafeDiv)) + if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals)) NewFDiv = Builder.CreateCall(Decl, { Num, Den }); } @@ -447,7 +453,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { FDiv.eraseFromParent(); } - return true; + return !!NewFDiv; } static bool hasUnsafeFPMath(const Function &F) { |