From 9601ddb2f306b6fad7aa1e34ccd3973fe7145275 Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sat, 18 Jun 2011 06:17:51 +0000 Subject: When scalar replacement returns a vector type, only accept it if the vector type's bitwidth matches the (allocated) size of the alloca. This severely pessimizes vector scalar replacement when the only vector type being used is something like <3 x float> on x86 or ARM whose allocated size matches a <4 x float>. I hope to fix some of the flawed assumptions about allocated size throughout scalar replacement and reenable this in most cases. llvm-svn: 133338 --- .../ScalarRepl/2011-06-08-VectorExtractValue.ll | 6 ++++-- .../ScalarRepl/2011-06-17-VectorPartialMemset.ll | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'llvm/test/Transforms/ScalarRepl') diff --git a/llvm/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/llvm/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll index 32e67fbce78..98fa1c684ce 100644 --- a/llvm/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll +++ b/llvm/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll @@ -10,7 +10,8 @@ target triple = "x86_64-apple-macosx10.7.0" ; CHECK: main ; CHECK-NOT: alloca -; CHECK: extractelement <2 x float> zeroinitializer +; CHECK: %[[A:[a-z0-9]*]] = and i128 +; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32 define void @main() uwtable ssp { entry: @@ -27,7 +28,8 @@ entry: ; CHECK: test1 ; CHECK-NOT: alloca -; CHECK: extractelement <2 x float> zeroinitializer +; CHECK: %[[A:[a-z0-9]*]] = and i128 +; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32 define void @test1() uwtable ssp { entry: diff --git a/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll b/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll index 29d195eba36..f8530d68a8f 100644 --- a/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll +++ b/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll @@ -19,4 +19,19 @@ entry: ret float %val } +; CHECK: g +; CHECK-NOT: alloca +; CHECK: and i128 + +define void @g() nounwind ssp { +entry: + %a = alloca { <4 x float> }, align 16 + %p = bitcast { <4 x float> }* %a to i8* + call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false) + %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]* + %arrayidx = getelementptr inbounds [2 x <2 x float>]* %q, i32 0, i32 0 + store <2 x float> undef, <2 x float>* %arrayidx, align 8 + ret void +} + declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -- cgit v1.2.3