if an alloca is only ever accessed as a unit, and is accessed with load/store instructions,

then don't try to decimate it into its individual pieces. This will just make a mess of the IR and is pointless if none of the elements are individually accessed. This was generating really terrible code for std::bitset (PR8980) because it happens to be lowered by clang as an {[8 x i8]} structure instead of {i64}. The testcase now is optimized to: define i64 @test2(i64 %X) { br label %L2 L2: ; preds = %0 ret i64 %X } before we generated: define i64 @test2(i64 %X) { %sroa.store.elt = lshr i64 %X, 56 %1 = trunc i64 %sroa.store.elt to i8 %sroa.store.elt8 = lshr i64 %X, 48 %2 = trunc i64 %sroa.store.elt8 to i8 %sroa.store.elt9 = lshr i64 %X, 40 %3 = trunc i64 %sroa.store.elt9 to i8 %sroa.store.elt10 = lshr i64 %X, 32 %4 = trunc i64 %sroa.store.elt10 to i8 %sroa.store.elt11 = lshr i64 %X, 24 %5 = trunc i64 %sroa.store.elt11 to i8 %sroa.store.elt12 = lshr i64 %X, 16 %6 = trunc i64 %sroa.store.elt12 to i8 %sroa.store.elt13 = lshr i64 %X, 8 %7 = trunc i64 %sroa.store.elt13 to i8 %8 = trunc i64 %X to i8 br label %L2 L2: ; preds = %0 %9 = zext i8 %1 to i64 %10 = shl i64 %9, 56 %11 = zext i8 %2 to i64 %12 = shl i64 %11, 48 %13 = or i64 %12, %10 %14 = zext i8 %3 to i64 %15 = shl i64 %14, 40 %16 = or i64 %15, %13 %17 = zext i8 %4 to i64 %18 = shl i64 %17, 32 %19 = or i64 %18, %16 %20 = zext i8 %5 to i64 %21 = shl i64 %20, 24 %22 = or i64 %21, %19 %23 = zext i8 %6 to i64 %24 = shl i64 %23, 16 %25 = or i64 %24, %22 %26 = zext i8 %7 to i64 %27 = shl i64 %26, 8 %28 = or i64 %27, %25 %29 = zext i8 %8 to i64 %30 = or i64 %29, %28 ret i64 %30 } In this case, instcombine was able to eliminate the nonsense, but in PR8980 enough PHIs are in play that instcombine backs off. It's better to not generate this stuff in the first place. llvm-svn: 123571
author: Chris Lattner <sabre@nondot.org> 2011-01-16 06:18:28 +0000
committer: Chris Lattner <sabre@nondot.org> 2011-01-16 06:18:28 +0000
commit: 6fab2e9418635c435a516e83279b25532e3bb1bf (patch)
tree: ccc82d7fd527b84067ca85f86a4d85a2afc33ca9 /llvm/test/Transforms/ScalarRepl
parent: 7cd8cf7d245b31309adbdb749564d3eee07ce50e (diff)
download: bcm5719-llvm-6fab2e9418635c435a516e83279b25532e3bb1bf.tar.gz
bcm5719-llvm-6fab2e9418635c435a516e83279b25532e3bb1bf.zip
1 files changed, 21 insertions, 2 deletions
diff --git a/llvm/test/Transforms/ScalarRepl/basictest.ll b/llvm/test/Transforms/ScalarRepl/basictest.ll
index a26b62d0ad7..9676873c30c 100644
--- a/llvm/test/Transforms/ScalarRepl/basictest.ll
+++ b/llvm/test/Transforms/ScalarRepl/basictest.ll
@@ -1,11 +1,30 @@
-; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
-define i32 @test() {
+define i32 @test1() {
 	%X = alloca { i32, float }		; <{ i32, float }*> [#uses=1]
 	%Y = getelementptr { i32, float }* %X, i64 0, i32 0		; <i32*> [#uses=2]
 	store i32 0, i32* %Y
 	%Z = load i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
+; CHECK: @test1
+; CHECK-NOT: alloca
+; CHECK: ret i32 0
+}
+
+; PR8980
+define i64 @test2(i64 %X) {
+	%A = alloca [8 x i8]
+        %B = bitcast [8 x i8]* %A to i64*
+        
+	store i64 %X, i64* %B
+        br label %L2
+        
+L2:
+	%Z = load i64* %B		; <i32> [#uses=1]
+	ret i64 %Z
+; CHECK: @test2
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
 }
author	Chris Lattner <sabre@nondot.org>	2011-01-16 06:18:28 +0000
committer	Chris Lattner <sabre@nondot.org>	2011-01-16 06:18:28 +0000
commit	6fab2e9418635c435a516e83279b25532e3bb1bf (patch)
tree	ccc82d7fd527b84067ca85f86a4d85a2afc33ca9 /llvm/test/Transforms/ScalarRepl
parent	7cd8cf7d245b31309adbdb749564d3eee07ce50e (diff)
download	bcm5719-llvm-6fab2e9418635c435a516e83279b25532e3bb1bf.tar.gz bcm5719-llvm-6fab2e9418635c435a516e83279b25532e3bb1bf.zip