summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2018-06-23 04:01:28 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2018-06-23 04:01:28 +0000
commitd8c9374797ccb9520b6ef7bdb29ed3a4bb5a1be2 (patch)
tree3bff07eac1e07ddd15248d60835e6c2679c817c1
parent96917d7912bdcf880b92e1bd8bcf83ccdebfab4d (diff)
downloadbcm5719-llvm-d8c9374797ccb9520b6ef7bdb29ed3a4bb5a1be2.tar.gz
bcm5719-llvm-d8c9374797ccb9520b6ef7bdb29ed3a4bb5a1be2.zip
Fix invariant fdiv hoisting in LICM
FDiv is replaced with multiplication by reciprocal and invariant reciprocal is hoisted out of the loop, while multiplication remains even if invariant. Switch checks for all invariant operands and only invariant denominator to fix the issue. Differential Revision: https://reviews.llvm.org/D48447 llvm-svn: 335411
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp28
-rw-r--r--llvm/test/Transforms/LICM/hoist-fast-fdiv.ll30
2 files changed, 42 insertions, 16 deletions
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 412f2678184..10f386064f9 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -477,6 +477,20 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
+ // Try hoisting the instruction out to the preheader. We can only do
+ // this if all of the operands of the instruction are loop invariant and
+ // if it is safe to hoist the instruction.
+ //
+ if (CurLoop->hasLoopInvariantOperands(&I) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) &&
+ (IsMustExecute ||
+ isSafeToExecuteUnconditionally(
+ I, DT, CurLoop, SafetyInfo, ORE,
+ CurLoop->getLoopPreheader()->getTerminator()))) {
+ Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE);
+ continue;
+ }
+
// Attempt to remove floating point division out of the loop by
// converting it to a reciprocal multiplication.
if (I.getOpcode() == Instruction::FDiv &&
@@ -500,20 +514,6 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
- // Try hoisting the instruction out to the preheader. We can only do
- // this if all of the operands of the instruction are loop invariant and
- // if it is safe to hoist the instruction.
- //
- if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) &&
- (IsMustExecute ||
- isSafeToExecuteUnconditionally(
- I, DT, CurLoop, SafetyInfo, ORE,
- CurLoop->getLoopPreheader()->getTerminator()))) {
- Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE);
- continue;
- }
-
if (IsMustExecute)
IsMustExecute = isGuaranteedToTransferExecutionToSuccessor(&I);
}
diff --git a/llvm/test/Transforms/LICM/hoist-fast-fdiv.ll b/llvm/test/Transforms/LICM/hoist-fast-fdiv.ll
index f61564fd726..57df241a2ce 100644
--- a/llvm/test/Transforms/LICM/hoist-fast-fdiv.ll
+++ b/llvm/test/Transforms/LICM/hoist-fast-fdiv.ll
@@ -1,9 +1,9 @@
; RUN: opt -licm -S < %s | FileCheck %s
; Function Attrs: noinline norecurse nounwind readnone ssp uwtable
-define zeroext i1 @f(double %v) #0 {
+define zeroext i1 @invariant_denom(double %v) #0 {
entry:
-; CHECK-LABEL: @f(
+; CHECK-LABEL: @invariant_denom(
; CHECK-NEXT: entry:
; CHECK-NEXT: fdiv fast double 1.000000e+00, %v
br label %loop
@@ -32,3 +32,29 @@ end: ; preds = %loop
ret i1 %v16
}
+define void @invariant_fdiv(float* %out, float %arg) {
+; CHECK-LABEL: @invariant_fdiv(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %div = fdiv fast float 4.000000e+00, %arg
+; CHECK-NEXT: fmul fast float %div, 0x41F0000000000000
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %ind = phi i32 [ 0, %entry ], [ %inc, %loop ]
+
+; CHECK-LABEL: loop:
+; CHECK: getelementptr
+; CHECK-NOT: fdiv
+; CHECK-NOT: fmul
+ %div = fdiv fast float 4.000000e+00, %arg
+ %mul = fmul fast float %div, 0x41F0000000000000
+ %gep = getelementptr inbounds float, float* %out, i32 %ind
+ store float %mul, float* %gep, align 4
+ %inc = add nuw nsw i32 %ind, 1
+ %cond = icmp eq i32 %inc, 1024
+ br i1 %cond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
OpenPOWER on IntegriCloud