summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h10
-rw-r--r--llvm/lib/CodeGen/HardwareLoops.cpp16
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp4
-rw-r--r--llvm/test/Transforms/HardwareLoops/ARM/structure.ll76
-rw-r--r--llvm/test/Transforms/HardwareLoops/unconditional-latch.ll46
5 files changed, 139 insertions, 13 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0ef8c08380a..52d4cb73c5b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -448,9 +448,7 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) const;
- /// Attributes of a target dependent hardware loop. Here, the term 'element'
- /// describes the work performed by an IR loop that has not been vectorized
- /// by the compiler.
+ /// Attributes of a target dependent hardware loop.
struct HardwareLoopInfo {
HardwareLoopInfo() = delete;
HardwareLoopInfo(Loop *L) : L(L) { }
@@ -459,10 +457,10 @@ public:
BranchInst *ExitBranch = nullptr;
const SCEV *ExitCount = nullptr;
IntegerType *CountType = nullptr;
- Value *LoopDecrement = nullptr; // The maximum number of elements
- // processed in the loop body.
+ Value *LoopDecrement = nullptr; // Decrement the loop counter by this
+ // value in every iteration.
bool IsNestingLegal = false; // Can a hardware loop be a parent to
- // another hardware loop.
+ // another hardware loop?
bool CounterInReg = false; // Should loop counter be updated in
// the loop via a phi?
};
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index df063545f28..99191090220 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -235,7 +235,17 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) {
for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
IE = ExitingBlocks.end(); I != IE; ++I) {
- const SCEV *EC = SE->getExitCount(L, *I);
+ BasicBlock *BB = *I;
+
+ // If we pass the updated counter back through a phi, we need to know
+ // which latch the updated value will be coming from.
+ if (!L->isLoopLatch(BB)) {
+ if ((ForceHardwareLoopPHI.getNumOccurrences() && ForceHardwareLoopPHI) ||
+ HWLoopInfo.CounterInReg)
+ continue;
+ }
+
+ const SCEV *EC = SE->getExitCount(L, BB);
if (isa<SCEVCouldNotCompute>(EC))
continue;
if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
@@ -251,7 +261,7 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) {
// If this exiting block is contained in a nested loop, it is not eligible
// for insertion of the branch-and-decrement since the inner loop would
// end up messing up the value in the CTR.
- if (!HWLoopInfo.IsNestingLegal && LI->getLoopFor(*I) != L &&
+ if (!HWLoopInfo.IsNestingLegal && LI->getLoopFor(BB) != L &&
!ForceNestedLoop)
continue;
@@ -278,7 +288,7 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) {
continue;
// Make sure this blocks ends with a conditional branch.
- Instruction *TI = (*I)->getTerminator();
+ Instruction *TI = BB->getTerminator();
if (!TI)
continue;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index a57a4049dbc..ca905dfc7ff 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -702,10 +702,6 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
if (!ST->hasLOB() || DisableLowOverheadLoops)
return false;
- // For now, for simplicity, only support loops with one exit block.
- if (!L->getExitBlock())
- return false;
-
if (!SE.hasLoopInvariantBackedgeTakenCount(L))
return false;
diff --git a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll
index 42726756a53..fa3dbc0d929 100644
--- a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll
+++ b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll
@@ -135,6 +135,82 @@ while.end7:
ret void
}
+; CHECK-LABEL: not_rotated
+; CHECK-NOT: call void @llvm.set.loop.iterations
+; CHECK-NOT: call i32 @llvm.loop.decrement.i32
+define void @not_rotated(i32, i16* nocapture, i16 signext) {
+ br label %4
+
+4:
+ %5 = phi i32 [ 0, %3 ], [ %19, %18 ]
+ %6 = icmp eq i32 %5, %0
+ br i1 %6, label %20, label %7
+
+7:
+ %8 = mul i32 %5, %0
+ br label %9
+
+9:
+ %10 = phi i32 [ %17, %12 ], [ 0, %7 ]
+ %11 = icmp eq i32 %10, %0
+ br i1 %11, label %18, label %12
+
+12:
+ %13 = add i32 %10, %8
+ %14 = getelementptr inbounds i16, i16* %1, i32 %13
+ %15 = load i16, i16* %14, align 2
+ %16 = add i16 %15, %2
+ store i16 %16, i16* %14, align 2
+ %17 = add i32 %10, 1
+ br label %9
+
+18:
+ %19 = add i32 %5, 1
+ br label %4
+
+20:
+ ret void
+}
+
+; CHECK-LABEL: multi_latch
+; CHECK-NOT: call void @llvm.set.loop.iterations
+; CHECK-NOT: call i32 @llvm.loop.decrement
+define void @multi_latch(i32* %a, i32* %b, i32 %N) {
+entry:
+ %half = lshr i32 %N, 1
+ br label %header
+
+header:
+ %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ]
+ %cmp = icmp ult i32 %iv, %half
+ %addr.a = getelementptr i32, i32* %a, i32 %iv
+ %addr.b = getelementptr i32, i32* %b, i32 %iv
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ store i32 %iv, i32* %addr.a
+ br label %latch.0
+
+if.else:
+ store i32 %iv, i32* %addr.b
+ br label %latch.0
+
+latch.0:
+ %count.next = add nuw i32 %iv, 1
+ %cmp.1 = icmp ult i32 %count.next, %half
+ br i1 %cmp.1, label %header, label %latch.1
+
+latch.1:
+ %ld = load i32, i32* %addr.a
+ store i32 %ld, i32* %addr.b
+ %cmp.2 = icmp ult i32 %count.next, %N
+ br i1 %cmp.2, label %header, label %latch.1
+
+exit:
+ ret void
+}
+
+
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
diff --git a/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll b/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll
new file mode 100644
index 00000000000..71479d04393
--- /dev/null
+++ b/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll
@@ -0,0 +1,46 @@
+; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
+; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
+
+; CHECK-LABEL: not_rotated
+; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations
+; CHECK-LATCH-NOT: call i1 @llvm.loop.decrement
+
+; CHECK-ALLOW: call void @llvm.set.loop.iterations.i32(i32 %4)
+; CHECK-ALLOW: br label %10
+
+; CHECK-ALLOW: [[CMP:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1)
+; CHECK-ALLOW: br i1 [[CMP]], label %13, label %19
+
+define void @not_rotated(i32, i16* nocapture, i16 signext) {
+ br label %4
+
+4:
+ %5 = phi i32 [ 0, %3 ], [ %19, %18 ]
+ %6 = icmp eq i32 %5, %0
+ br i1 %6, label %20, label %7
+
+7:
+ %8 = mul i32 %5, %0
+ br label %9
+
+9:
+ %10 = phi i32 [ %17, %12 ], [ 0, %7 ]
+ %11 = icmp eq i32 %10, %0
+ br i1 %11, label %18, label %12
+
+12:
+ %13 = add i32 %10, %8
+ %14 = getelementptr inbounds i16, i16* %1, i32 %13
+ %15 = load i16, i16* %14, align 2
+ %16 = add i16 %15, %2
+ store i16 %16, i16* %14, align 2
+ %17 = add i32 %10, 1
+ br label %9
+
+18:
+ %19 = add i32 %5, 1
+ br label %4
+
+20:
+ ret void
+}
OpenPOWER on IntegriCloud