R600: Unconditionally unroll loops that contain GEPs with alloca pointers

Implement the getUnrollingPreferences() function for AMDGPUTargetTransformInfo so that loops that do address calculations on pointers derived from alloca are unconditionally unrolled. Unrolling these loops makes it more likely that SROA will be able to eliminate the allocas, which is a big win for R600 since memory allocated by alloca (private memory) is really slow. llvm-svn: 199916
author: Tom Stellard <thomas.stellard@amd.com> 2014-01-23 18:49:28 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2014-01-23 18:49:28 +0000
commit: 8cce9bdf179c29c3652d4862d6317d7e2e7a411c (patch)
tree: 3853f3f2a08df6448a6fa1ecfb3778ee31a1da1c /llvm/test/CodeGen
parent: b3500e606195e9026d292a1cdd3faede6be7b0c2 (diff)
download: bcm5719-llvm-8cce9bdf179c29c3652d4862d6317d7e2e7a411c.tar.gz
bcm5719-llvm-8cce9bdf179c29c3652d4862d6317d7e2e7a411c.zip
1 files changed, 37 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/R600/unroll.ll b/llvm/test/CodeGen/R600/unroll.ll
new file mode 100644
index 00000000000..e0035eae71c
--- /dev/null
+++ b/llvm/test/CodeGen/R600/unroll.ll
@@ -0,0 +1,37 @@
+; RUN: opt -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+target triple = "r600--"
+
+; This test contains a simple loop that initializes an array declared in
+; private memory.  We want to make sure these kinds of loops are always
+; unrolled, because private memory is slow.
+
+; CHECK-LABEL: @test
+; CHECK-NOT: alloca
+; CHECK: store i32 5, i32 addrspace(1)* %out
+define void @test(i32 addrspace(1)* %out) {
+entry:
+  %0 = alloca [32 x i32]
+  br label %loop.header
+
+loop.header:
+  %counter = phi i32 [0, %entry], [%inc, %loop.inc]
+  br label %loop.body
+
+loop.body:
+  %ptr = getelementptr [32 x i32]* %0, i32 0, i32 %counter
+  store i32 %counter, i32* %ptr
+  br label %loop.inc
+
+loop.inc:
+  %inc = add i32 %counter, 1
+  %1 = icmp sge i32 %counter, 32
+  br i1 %1, label  %exit, label %loop.header
+
+exit:
+  %2 = getelementptr [32 x i32]* %0, i32 0, i32 5
+  %3 = load i32* %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
author	Tom Stellard <thomas.stellard@amd.com>	2014-01-23 18:49:28 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2014-01-23 18:49:28 +0000
commit	8cce9bdf179c29c3652d4862d6317d7e2e7a411c (patch)
tree	3853f3f2a08df6448a6fa1ecfb3778ee31a1da1c /llvm/test/CodeGen
parent	b3500e606195e9026d292a1cdd3faede6be7b0c2 (diff)
download	bcm5719-llvm-8cce9bdf179c29c3652d4862d6317d7e2e7a411c.tar.gz bcm5719-llvm-8cce9bdf179c29c3652d4862d6317d7e2e7a411c.zip