diff options
Diffstat (limited to 'llvm/test/Transforms/SROA')
24 files changed, 0 insertions, 4975 deletions
diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll deleted file mode 100644 index 9cd9137833a..00000000000 --- a/llvm/test/Transforms/SROA/address-spaces.ll +++ /dev/null @@ -1,131 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) -declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i1) -declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) -declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) - - -; Make sure an illegal bitcast isn't introduced -define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) { -; CHECK-LABEL: @test_address_space_1_1( -; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 -; CHECK: ret void - %aa = alloca <2 x i64>, align 16 - %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* - %aaptr = bitcast <2 x i64>* %aa to i8* - call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 2 %aaptr, i8 addrspace(1)* align 2 %aptr, i32 16, i1 false) - %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false) - ret void -} - -define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) { -; CHECK-LABEL: @test_address_space_1_0( -; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2 -; CHECK: ret void - %aa = alloca <2 x i64>, align 16 - %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* - %aaptr = bitcast <2 x i64>* %aa to i8* - call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 2 %aaptr, i8 addrspace(1)* align 2 %aptr, i32 16, i1 false) - %bptr = bitcast i16* %b to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false) - ret void -} - -define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) { -; CHECK-LABEL: @test_address_space_0_1( -; CHECK: load <2 x i64>, <2 x i64>* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 -; CHECK: ret void - %aa = alloca <2 x i64>, align 16 - %aptr = bitcast <2 x i64>* %a to i8* - %aaptr = bitcast <2 x i64>* %aa to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %aaptr, i8* align 2 %aptr, i32 16, i1 false) - %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false) - ret void -} - -%struct.struct_test_27.0.13 = type { i32, float, i64, i8, [4 x i32] } - -; Function Attrs: nounwind -define void @copy_struct([5 x i64] %in.coerce) { -; CHECK-LABEL: @copy_struct( -; CHECK-NOT: memcpy -for.end: - %in = alloca %struct.struct_test_27.0.13, align 8 - %0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64]* - store [5 x i64] %in.coerce, [5 x i64]* %0, align 8 - %scevgep9 = getelementptr %struct.struct_test_27.0.13, %struct.struct_test_27.0.13* %in, i32 0, i32 4, i32 0 - %scevgep910 = bitcast i32* %scevgep9 to i8* - call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 undef, i8* align 4 %scevgep910, i32 16, i1 false) - ret void -} - -%union.anon = type { i32* } - -@g = common global i32 0, align 4 -@l = common addrspace(3) global i32 0, align 4 - -; Make sure an illegal bitcast isn't introduced -define void @pr27557() { -; CHECK-LABEL: @pr27557( -; CHECK: %[[CAST:.*]] = bitcast i32** {{.*}} to i32 addrspace(3)** -; CHECK: store i32 addrspace(3)* @l, i32 addrspace(3)** %[[CAST]] - %1 = alloca %union.anon, align 8 - %2 = bitcast %union.anon* %1 to i32** - store i32* @g, i32** %2, align 8 - %3 = bitcast %union.anon* %1 to i32 addrspace(3)** - store i32 addrspace(3)* @l, i32 addrspace(3)** %3, align 8 - ret void -} - -; Make sure pre-splitting doesn't try to introduce an illegal bitcast -define float @presplit(i64 addrspace(1)* %p) { -entry: -; CHECK-LABEL: @presplit( -; CHECK: %[[CAST:.*]] = bitcast i64 addrspace(1)* {{.*}} to i32 addrspace(1)* -; CHECK: load i32, i32 addrspace(1)* %[[CAST]] - %b = alloca i64 - %b.cast = bitcast i64* %b to [2 x float]* - %b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0 - %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1 - %l = load i64, i64 addrspace(1)* %p - store i64 %l, i64* %b - %f1 = load float, float* %b.gep1 - %f2 = load float, float* %b.gep2 - %ret = fadd float %f1, %f2 - ret float %ret -} - -; Test load from and store to non-zero address space. -define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) { -; CHECK-LABEL: @test_load_store_diff_addr_space -; CHECK-NOT: alloca -; CHECK: load i32, i32 addrspace(1)* -; CHECK: load i32, i32 addrspace(1)* -; CHECK: store i32 %{{.*}}, i32 addrspace(1)* -; CHECK: store i32 %{{.*}}, i32 addrspace(1)* - %a = alloca i64 - %a.cast = bitcast i64* %a to [2 x float]* - %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0 - %a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1 - %complex1.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex1, i32 0, i32 0 - %p1 = bitcast float addrspace(1)* %complex1.gep to i64 addrspace(1)* - %v1 = load i64, i64 addrspace(1)* %p1 - store i64 %v1, i64* %a - %f1 = load float, float* %a.gep1 - %f2 = load float, float* %a.gep2 - %sum = fadd float %f1, %f2 - store float %sum, float* %a.gep1 - store float %sum, float* %a.gep2 - %v2 = load i64, i64* %a - %complex2.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex2, i32 0, i32 0 - %p2 = bitcast float addrspace(1)* %complex2.gep to i64 addrspace(1)* - store i64 %v2, i64 addrspace(1)* %p2 - ret void -} diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll deleted file mode 100644 index 81f8f2a00ba..00000000000 --- a/llvm/test/Transforms/SROA/alignment.ll +++ /dev/null @@ -1,231 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -; RUN: opt -debugify -sroa -S < %s | FileCheck %s -check-prefix DEBUGLOC - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) - -define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) { -; CHECK-LABEL: @test1( -; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 0 -; CHECK: %[[a0:.*]] = load i8, i8* %[[gep_a0]], align 16 -; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 1 -; CHECK: %[[a1:.*]] = load i8, i8* %[[gep_a1]], align 1 -; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 0 -; CHECK: store i8 %[[a0]], i8* %[[gep_b0]], align 16 -; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 1 -; CHECK: store i8 %[[a1]], i8* %[[gep_b1]], align 1 -; CHECK: ret void - -entry: - %alloca = alloca { i8, i8 }, align 16 - %gep_a = getelementptr { i8, i8 }, { i8, i8 }* %a, i32 0, i32 0 - %gep_alloca = getelementptr { i8, i8 }, { i8, i8 }* %alloca, i32 0, i32 0 - %gep_b = getelementptr { i8, i8 }, { i8, i8 }* %b, i32 0, i32 0 - - store i8 420, i8* %gep_alloca, align 16 - - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %gep_alloca, i8* align 16 %gep_a, i32 2, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %gep_b, i8* align 16 %gep_alloca, i32 2, i1 false) - ret void -} - -define void @test2() { -; CHECK-LABEL: @test2( -; CHECK: alloca i16 -; CHECK: load i8, i8* %{{.*}} -; CHECK: store i8 42, i8* %{{.*}} -; CHECK: ret void - -; Check that when sroa rewrites the alloca partition -; it preserves the original DebugLocation. -; DEBUGLOC-LABEL: @test2( -; DEBUGLOC: {{.*}} = alloca {{.*}} !dbg ![[DbgLoc:[0-9]+]] -; DEBUGLOC-LABEL: } -; -; DEBUGLOC: ![[DbgLoc]] = !DILocation(line: 9, - -entry: - %a = alloca { i8, i8, i8, i8 }, align 2 ; "line 9" to -debugify - %gep1 = getelementptr { i8, i8, i8, i8 }, { i8, i8, i8, i8 }* %a, i32 0, i32 1 - %cast1 = bitcast i8* %gep1 to i16* - store volatile i16 0, i16* %cast1 - %gep2 = getelementptr { i8, i8, i8, i8 }, { i8, i8, i8, i8 }* %a, i32 0, i32 2 - %result = load i8, i8* %gep2 - store i8 42, i8* %gep2 - ret void -} - -define void @PR13920(<2 x i64>* %a, i16* %b) { -; Test that alignments on memcpy intrinsics get propagated to loads and stores. -; CHECK-LABEL: @PR13920( -; CHECK: load <2 x i64>, <2 x i64>* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2 -; CHECK: ret void - -entry: - %aa = alloca <2 x i64>, align 16 - %aptr = bitcast <2 x i64>* %a to i8* - %aaptr = bitcast <2 x i64>* %aa to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %aaptr, i8* align 2 %aptr, i32 16, i1 false) - %bptr = bitcast i16* %b to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false) - ret void -} - -define void @test3(i8* %x) { -; Test that when we promote an alloca to a type with lower ABI alignment, we -; provide the needed explicit alignment that code using the alloca may be -; expecting. However, also check that any offset within an alloca can in turn -; reduce the alignment. -; CHECK-LABEL: @test3( -; CHECK: alloca [22 x i8], align 8 -; CHECK: alloca [18 x i8], align 2 -; CHECK: ret void - -entry: - %a = alloca { i8*, i8*, i8* } - %b = alloca { i8*, i8*, i8* } - %a_raw = bitcast { i8*, i8*, i8* }* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %a_raw, i8* align 8 %x, i32 22, i1 false) - %b_raw = bitcast { i8*, i8*, i8* }* %b to i8* - %b_gep = getelementptr i8, i8* %b_raw, i32 6 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %b_gep, i8* align 2 %x, i32 18, i1 false) - ret void -} - -define void @test5() { -; Test that we preserve underaligned loads and stores when splitting. The use -; of volatile in this test case is just to force the loads and stores to not be -; split or promoted out of existence. -; -; CHECK-LABEL: @test5( -; CHECK: alloca [9 x i8] -; CHECK: alloca [9 x i8] -; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1 -; CHECK: load volatile i16, i16* %{{.*}}, align 1 -; CHECK: load double, double* %{{.*}}, align 1 -; CHECK: store volatile double %{{.*}}, double* %{{.*}}, align 1 -; CHECK: load volatile i16, i16* %{{.*}}, align 1 -; CHECK: ret void - -entry: - %a = alloca [18 x i8] - %raw1 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 0 - %ptr1 = bitcast i8* %raw1 to double* - store volatile double 0.0, double* %ptr1, align 1 - %weird_gep1 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 7 - %weird_cast1 = bitcast i8* %weird_gep1 to i16* - %weird_load1 = load volatile i16, i16* %weird_cast1, align 1 - - %raw2 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 9 - %ptr2 = bitcast i8* %raw2 to double* - %d1 = load double, double* %ptr1, align 1 - store volatile double %d1, double* %ptr2, align 1 - %weird_gep2 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 16 - %weird_cast2 = bitcast i8* %weird_gep2 to i16* - %weird_load2 = load volatile i16, i16* %weird_cast2, align 1 - - ret void -} - -define void @test6() { -; Test that we promote alignment when the underlying alloca switches to one -; that innately provides it. -; CHECK-LABEL: @test6( -; CHECK: alloca double -; CHECK: alloca double -; CHECK-NOT: align -; CHECK: ret void - -entry: - %a = alloca [16 x i8] - %raw1 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 0 - %ptr1 = bitcast i8* %raw1 to double* - store volatile double 0.0, double* %ptr1, align 1 - - %raw2 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 8 - %ptr2 = bitcast i8* %raw2 to double* - %val = load double, double* %ptr1, align 1 - store volatile double %val, double* %ptr2, align 1 - - ret void -} - -define void @test7(i8* %out) { -; Test that we properly compute the destination alignment when rewriting -; memcpys as direct loads or stores. -; CHECK-LABEL: @test7( -; CHECK-NOT: alloca - -entry: - %a = alloca [16 x i8] - %raw1 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 0 - %ptr1 = bitcast i8* %raw1 to double* - %raw2 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 8 - %ptr2 = bitcast i8* %raw2 to double* - - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i1 false) -; CHECK: %[[val2:.*]] = load double, double* %{{.*}}, align 1 -; CHECK: %[[val1:.*]] = load double, double* %{{.*}}, align 1 - - %val1 = load double, double* %ptr2, align 1 - %val2 = load double, double* %ptr1, align 1 - - store double %val1, double* %ptr1, align 1 - store double %val2, double* %ptr2, align 1 - - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i1 false) -; CHECK: store double %[[val1]], double* %{{.*}}, align 1 -; CHECK: store double %[[val2]], double* %{{.*}}, align 1 - - ret void -; CHECK: ret void -} - -define void @test8() { -; CHECK-LABEL: @test8( -; CHECK: load i32, {{.*}}, align 1 -; CHECK: load i32, {{.*}}, align 1 -; CHECK: load i32, {{.*}}, align 1 -; CHECK: load i32, {{.*}}, align 1 -; CHECK: load i32, {{.*}}, align 1 - - %ptr = alloca [5 x i32], align 1 - %ptr.8 = bitcast [5 x i32]* %ptr to i8* - call void @populate(i8* %ptr.8) - %val = load [5 x i32], [5 x i32]* %ptr, align 1 - ret void -} - -define void @test9() { -; CHECK-LABEL: @test9( -; CHECK: load i32, {{.*}}, align 8 -; CHECK: load i32, {{.*}}, align 4 -; CHECK: load i32, {{.*}}, align 8 -; CHECK: load i32, {{.*}}, align 4 -; CHECK: load i32, {{.*}}, align 8 - - %ptr = alloca [5 x i32], align 8 - %ptr.8 = bitcast [5 x i32]* %ptr to i8* - call void @populate(i8* %ptr.8) - %val = load [5 x i32], [5 x i32]* %ptr, align 8 - ret void -} - -define void @test10() { -; CHECK-LABEL: @test10( -; CHECK: load i32, {{.*}}, align 2 -; CHECK: load i8, {{.*}}, align 2 -; CHECK: load i8, {{.*}}, align 1 -; CHECK: load i8, {{.*}}, align 2 -; CHECK: load i16, {{.*}}, align 2 - - %ptr = alloca {i32, i8, i8, {i8, i16}}, align 2 - %ptr.8 = bitcast {i32, i8, i8, {i8, i16}}* %ptr to i8* - call void @populate(i8* %ptr.8) - %val = load {i32, i8, i8, {i8, i16}}, {i32, i8, i8, {i8, i16}}* %ptr, align 2 - ret void -} - -declare void @populate(i8*) diff --git a/llvm/test/Transforms/SROA/alloca-address-space.ll b/llvm/test/Transforms/SROA/alloca-address-space.ll deleted file mode 100644 index d28bc39e963..00000000000 --- a/llvm/test/Transforms/SROA/alloca-address-space.ll +++ /dev/null @@ -1,113 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64-A2" - -declare void @llvm.memcpy.p2i8.p2i8.i32(i8 addrspace(2)* nocapture, i8 addrspace(2)* nocapture readonly, i32, i1) -declare void @llvm.memcpy.p1i8.p2i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture readonly, i32, i1) -declare void @llvm.memcpy.p2i8.p1i8.i32(i8 addrspace(2)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) -declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) - - - -; CHECK-LABEL: @test_address_space_1_1( -; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 -; CHECK: ret void -define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) { - %aa = alloca <2 x i64>, align 16, addrspace(2) - %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* - %aaptr = bitcast <2 x i64> addrspace(2)* %aa to i8 addrspace(2)* - call void @llvm.memcpy.p2i8.p1i8.i32(i8 addrspace(2)* align 2 %aaptr, i8 addrspace(1)* align 2 %aptr, i32 16, i1 false) - %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p2i8.i32(i8 addrspace(1)* align 2 %bptr, i8 addrspace(2)* align 2 %aaptr, i32 16, i1 false) - ret void -} - -; CHECK-LABEL: @test_address_space_1_0( -; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(2)* {{.*}}, align 2 -; CHECK: ret void -define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16 addrspace(2)* %b) { - %aa = alloca <2 x i64>, align 16, addrspace(2) - %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* - %aaptr = bitcast <2 x i64> addrspace(2)* %aa to i8 addrspace(2)* - call void @llvm.memcpy.p2i8.p1i8.i32(i8 addrspace(2)* align 2 %aaptr, i8 addrspace(1)* align 2 %aptr, i32 16, i1 false) - %bptr = bitcast i16 addrspace(2)* %b to i8 addrspace(2)* - call void @llvm.memcpy.p2i8.p2i8.i32(i8 addrspace(2)* align 2 %bptr, i8 addrspace(2)* align 2 %aaptr, i32 16, i1 false) - ret void -} - -; CHECK-LABEL: @test_address_space_0_1( -; CHECK: load <2 x i64>, <2 x i64> addrspace(2)* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 -; CHECK: ret void -define void @test_address_space_0_1(<2 x i64> addrspace(2)* %a, i16 addrspace(1)* %b) { - %aa = alloca <2 x i64>, align 16, addrspace(2) - %aptr = bitcast <2 x i64> addrspace(2)* %a to i8 addrspace(2)* - %aaptr = bitcast <2 x i64> addrspace(2)* %aa to i8 addrspace(2)* - call void @llvm.memcpy.p2i8.p2i8.i32(i8 addrspace(2)* align 2 %aaptr, i8 addrspace(2)* align 2 %aptr, i32 16, i1 false) - %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p2i8.i32(i8 addrspace(1)* align 2 %bptr, i8 addrspace(2)* align 2 %aaptr, i32 16, i1 false) - ret void -} - -%struct.struct_test_27.0.13 = type { i32, float, i64, i8, [4 x i32] } - -; CHECK-LABEL: @copy_struct( -; CHECK-NOT: memcpy -define void @copy_struct([5 x i64] %in.coerce) { -for.end: - %in = alloca %struct.struct_test_27.0.13, align 8, addrspace(2) - %0 = bitcast %struct.struct_test_27.0.13 addrspace(2)* %in to [5 x i64] addrspace(2)* - store [5 x i64] %in.coerce, [5 x i64] addrspace(2)* %0, align 8 - %scevgep9 = getelementptr %struct.struct_test_27.0.13, %struct.struct_test_27.0.13 addrspace(2)* %in, i32 0, i32 4, i32 0 - %scevgep910 = bitcast i32 addrspace(2)* %scevgep9 to i8 addrspace(2)* - call void @llvm.memcpy.p1i8.p2i8.i32(i8 addrspace(1)* align 4 undef, i8 addrspace(2)* align 4 %scevgep910, i32 16, i1 false) - ret void -} - -%union.anon = type { i32* } - -@g = common global i32 0, align 4 -@l = common addrspace(3) global i32 0, align 4 - -; Make sure an illegal bitcast isn't introduced -; CHECK-LABEL: @pr27557( -; CHECK: %[[CAST:.*]] = bitcast i32* addrspace(2)* {{.*}} to i32 addrspace(3)* addrspace(2)* -; CHECK: store i32 addrspace(3)* @l, i32 addrspace(3)* addrspace(2)* %[[CAST]] -define void @pr27557() { - %1 = alloca %union.anon, align 8, addrspace(2) - %2 = bitcast %union.anon addrspace(2)* %1 to i32* addrspace(2)* - store i32* @g, i32* addrspace(2)* %2, align 8 - %3 = bitcast %union.anon addrspace(2)* %1 to i32 addrspace(3)* addrspace(2)* - store i32 addrspace(3)* @l, i32 addrspace(3)* addrspace(2)* %3, align 8 - ret void -} - -; Test load from and store to non-zero address space. -define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) { -; CHECK-LABEL: @test_load_store_diff_addr_space -; CHECK-NOT: alloca -; CHECK: load i32, i32 addrspace(1)* -; CHECK: load i32, i32 addrspace(1)* -; CHECK: store i32 %{{.*}}, i32 addrspace(1)* -; CHECK: store i32 %{{.*}}, i32 addrspace(1)* - %a0 = alloca [2 x i64], align 8, addrspace(2) - %a = getelementptr [2 x i64], [2 x i64] addrspace(2)* %a0, i32 0, i32 0 - %a.cast = bitcast i64 addrspace(2)* %a to [2 x float] addrspace(2)* - %a.gep1 = getelementptr [2 x float], [2 x float] addrspace(2)* %a.cast, i32 0, i32 0 - %a.gep2 = getelementptr [2 x float], [2 x float] addrspace(2)* %a.cast, i32 0, i32 1 - %complex1.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex1, i32 0, i32 0 - %p1 = bitcast float addrspace(1)* %complex1.gep to i64 addrspace(1)* - %v1 = load i64, i64 addrspace(1)* %p1 - store i64 %v1, i64 addrspace(2)* %a - %f1 = load float, float addrspace(2)* %a.gep1 - %f2 = load float, float addrspace(2)* %a.gep2 - %sum = fadd float %f1, %f2 - store float %sum, float addrspace(2)* %a.gep1 - store float %sum, float addrspace(2)* %a.gep2 - %v2 = load i64, i64 addrspace(2)* %a - %complex2.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex2, i32 0, i32 0 - %p2 = bitcast float addrspace(1)* %complex2.gep to i64 addrspace(1)* - store i64 %v2, i64 addrspace(1)* %p2 - ret void -} diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll deleted file mode 100644 index 2c5829d6fce..00000000000 --- a/llvm/test/Transforms/SROA/basictest.ll +++ /dev/null @@ -1,1918 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -; RUN: opt < %s -passes=sroa -S | FileCheck %s - -target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) - -define i32 @test0() { -; CHECK-LABEL: @test0( -; CHECK-NOT: alloca -; CHECK: ret i32 - -entry: - %a1 = alloca i32 - %a2 = alloca float - - %a1.i8 = bitcast i32* %a1 to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %a1.i8) - - store i32 0, i32* %a1 - %v1 = load i32, i32* %a1 - - call void @llvm.lifetime.end.p0i8(i64 4, i8* %a1.i8) - - %a2.i8 = bitcast float* %a2 to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %a2.i8) - - store float 0.0, float* %a2 - %v2 = load float , float * %a2 - %v2.int = bitcast float %v2 to i32 - %sum1 = add i32 %v1, %v2.int - - call void @llvm.lifetime.end.p0i8(i64 4, i8* %a2.i8) - - ret i32 %sum1 -} - -define i32 @test1() { -; CHECK-LABEL: @test1( -; CHECK-NOT: alloca -; CHECK: ret i32 0 - -entry: - %X = alloca { i32, float } - %Y = getelementptr { i32, float }, { i32, float }* %X, i64 0, i32 0 - store i32 0, i32* %Y - %Z = load i32, i32* %Y - ret i32 %Z -} - -define i64 @test2(i64 %X) { -; CHECK-LABEL: @test2( -; CHECK-NOT: alloca -; CHECK: ret i64 %X - -entry: - %A = alloca [8 x i8] - %B = bitcast [8 x i8]* %A to i64* - store i64 %X, i64* %B - br label %L2 - -L2: - %Z = load i64, i64* %B - ret i64 %Z -} - -define void @test3(i8* %dst, i8* align 8 %src) { -; CHECK-LABEL: @test3( - -entry: - %a = alloca [300 x i8] -; CHECK-NOT: alloca -; CHECK: %[[test3_a1:.*]] = alloca [42 x i8] -; CHECK-NEXT: %[[test3_a2:.*]] = alloca [99 x i8] -; CHECK-NEXT: %[[test3_a3:.*]] = alloca [16 x i8] -; CHECK-NEXT: %[[test3_a4:.*]] = alloca [42 x i8] -; CHECK-NEXT: %[[test3_a5:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test3_a6:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8] - - %b = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* align 8 %src, i32 300, i1 false), !tbaa !0 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %src, i32 42, {{.*}}), !tbaa [[TAG_0:!.*]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42 -; CHECK-NEXT: %[[test3_r1:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_0:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 142 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_0:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 158 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_0:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 200 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 207 -; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 208 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 215 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}), !tbaa [[TAG_0:!.*]] - - ; Clobber a single element of the array, this should be promotable, and be deleted. - %c = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 42 - store i8 0, i8* %c - - ; Make a sequence of overlapping stores to the array. These overlap both in - ; forward strides and in shrinking accesses. - %overlap.1.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 142 - %overlap.2.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 143 - %overlap.3.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 144 - %overlap.4.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 145 - %overlap.5.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 146 - %overlap.6.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 147 - %overlap.7.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 148 - %overlap.8.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 149 - %overlap.9.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 150 - %overlap.1.i16 = bitcast i8* %overlap.1.i8 to i16* - %overlap.1.i32 = bitcast i8* %overlap.1.i8 to i32* - %overlap.1.i64 = bitcast i8* %overlap.1.i8 to i64* - %overlap.2.i64 = bitcast i8* %overlap.2.i8 to i64* - %overlap.3.i64 = bitcast i8* %overlap.3.i8 to i64* - %overlap.4.i64 = bitcast i8* %overlap.4.i8 to i64* - %overlap.5.i64 = bitcast i8* %overlap.5.i8 to i64* - %overlap.6.i64 = bitcast i8* %overlap.6.i8 to i64* - %overlap.7.i64 = bitcast i8* %overlap.7.i8 to i64* - %overlap.8.i64 = bitcast i8* %overlap.8.i8 to i64* - %overlap.9.i64 = bitcast i8* %overlap.9.i8 to i64* - store i8 1, i8* %overlap.1.i8, !tbaa !3 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: store i8 1, i8* %[[gep]], !tbaa [[TAG_3:!.*]] - store i16 1, i16* %overlap.1.i16, !tbaa !5 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i16* -; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_5:!.*]] - store i32 1, i32* %overlap.1.i32, !tbaa !7 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_7:!.*]] - store i64 1, i64* %overlap.1.i64, !tbaa !9 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i64* -; CHECK-NEXT: store i64 1, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_9:!.*]] - store i64 2, i64* %overlap.2.i64, !tbaa !11 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 2, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_11:!.*]] - store i64 3, i64* %overlap.3.i64, !tbaa !13 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 2 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 3, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_13:!.*]] - store i64 4, i64* %overlap.4.i64, !tbaa !15 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 3 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 4, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_15:!.*]] - store i64 5, i64* %overlap.5.i64, !tbaa !17 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 4 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 5, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_17:!.*]] - store i64 6, i64* %overlap.6.i64, !tbaa !19 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 5 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 6, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_19:!.*]] - store i64 7, i64* %overlap.7.i64, !tbaa !21 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 6 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 7, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_21:!.*]] - store i64 8, i64* %overlap.8.i64, !tbaa !23 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 7 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 8, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_23:!.*]] - store i64 9, i64* %overlap.9.i64, !tbaa !25 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 8 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 9, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_25:!.*]] - - ; Make two sequences of overlapping stores with more gaps and irregularities. - %overlap2.1.0.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 200 - %overlap2.1.1.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 201 - %overlap2.1.2.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 202 - %overlap2.1.3.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 203 - - %overlap2.2.0.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 208 - %overlap2.2.1.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 209 - %overlap2.2.2.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 210 - %overlap2.2.3.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 211 - - %overlap2.1.0.i16 = bitcast i8* %overlap2.1.0.i8 to i16* - %overlap2.1.0.i32 = bitcast i8* %overlap2.1.0.i8 to i32* - %overlap2.1.1.i32 = bitcast i8* %overlap2.1.1.i8 to i32* - %overlap2.1.2.i32 = bitcast i8* %overlap2.1.2.i8 to i32* - %overlap2.1.3.i32 = bitcast i8* %overlap2.1.3.i8 to i32* - store i8 1, i8* %overlap2.1.0.i8, !tbaa !27 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: store i8 1, i8* %[[gep]], !tbaa [[TAG_27:!.*]] - store i16 1, i16* %overlap2.1.0.i16, !tbaa !29 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i16* -; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_29:!.*]] - store i32 1, i32* %overlap2.1.0.i32, !tbaa !31 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_31:!.*]] - store i32 2, i32* %overlap2.1.1.i32, !tbaa !33 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 2, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_33:!.*]] - store i32 3, i32* %overlap2.1.2.i32, !tbaa !35 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 3, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_35:!.*]] - store i32 4, i32* %overlap2.1.3.i32, !tbaa !37 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 3 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 4, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_37:!.*]] - - %overlap2.2.0.i32 = bitcast i8* %overlap2.2.0.i8 to i32* - %overlap2.2.1.i16 = bitcast i8* %overlap2.2.1.i8 to i16* - %overlap2.2.1.i32 = bitcast i8* %overlap2.2.1.i8 to i32* - %overlap2.2.2.i32 = bitcast i8* %overlap2.2.2.i8 to i32* - %overlap2.2.3.i32 = bitcast i8* %overlap2.2.3.i8 to i32* - store i32 1, i32* %overlap2.2.0.i32, !tbaa !39 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a6]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_39:!.*]] - store i8 1, i8* %overlap2.2.1.i8, !tbaa !41 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: store i8 1, i8* %[[gep]], !tbaa [[TAG_41:!.*]] - store i16 1, i16* %overlap2.2.1.i16, !tbaa !43 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_43:!.*]] - store i32 1, i32* %overlap2.2.1.i32, !tbaa !45 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_45:!.*]] - store i32 3, i32* %overlap2.2.2.i32, !tbaa !47 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 3, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_47:!.*]] - store i32 4, i32* %overlap2.2.3.i32, !tbaa !49 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 3 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 4, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_49:!.*]] - - %overlap2.prefix = getelementptr i8, i8* %overlap2.1.1.i8, i64 -4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i1 false), !tbaa !51 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 39 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 3, {{.*}}), !tbaa [[TAG_51:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 3 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 5, {{.*}}), !tbaa [[TAG_51]] - - ; Bridge between the overlapping areas - call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i1 false), !tbaa !53 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2 -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 5, {{.*}}), !tbaa [[TAG_53:!.*]] -; ...promoted i8 store... -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 2, {{.*}}), !tbaa [[TAG_53]] - - ; Entirely within the second overlap. - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i1 false), !tbaa !55 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5, {{.*}}), !tbaa [[TAG_55:!.*]] - - ; Trailing past the second overlap. - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i1 false), !tbaa !57 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5, {{.*}}), !tbaa [[TAG_57:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 5 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 3, {{.*}}), !tbaa [[TAG_57]] - - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i1 false), !tbaa !59 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 42, {{.*}}), !tbaa [[TAG_59:!.*]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42 -; CHECK-NEXT: store i8 0, i8* %[[gep]], {{.*}}, !tbaa [[TAG_59]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_59]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 142 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_59]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 158 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_59]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 200 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_59]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 207 -; CHECK-NEXT: store i8 42, i8* %[[gep]], {{.*}}, !tbaa [[TAG_59]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 208 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_59]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 215 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}), !tbaa [[TAG_59]] - - ret void -} - -define void @test4(i8* %dst, i8* %src) { -; CHECK-LABEL: @test4( - -entry: - %a = alloca [100 x i8] -; CHECK-NOT: alloca -; CHECK: %[[test4_a1:.*]] = alloca [20 x i8] -; CHECK-NEXT: %[[test4_a2:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test4_a3:.*]] = alloca [10 x i8] -; CHECK-NEXT: %[[test4_a4:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test4_a5:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8] - - %b = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i1 false), !tbaa !0 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 20, {{.*}}), !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 20 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: %[[test4_r1:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 22 -; CHECK-NEXT: %[[test4_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 23 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 30 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}}), !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 40 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: %[[test4_r3:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42 -; CHECK-NEXT: %[[test4_r4:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 50 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: %[[test4_r5:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 52 -; CHECK-NEXT: %[[test4_r6:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 53 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 60 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}}), !tbaa [[TAG_0]] - - %a.src.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 20 - %a.dst.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 40 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i1 false), !tbaa !3 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_3]] - - ; Clobber a single element of the array, this should be promotable, and be deleted. - %c = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 42 - store i8 0, i8* %c - - %a.src.2 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 50 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i1 false), !tbaa !5 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_5]] - - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i1 false), !tbaa !7 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 20, {{.*}}), !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 20 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 22 -; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]], {{.*}}, !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 23 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 30 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}}), !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 40 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42 -; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}}, !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 50 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 52 -; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}}, !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 53 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 60 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}}), !tbaa [[TAG_7]] - - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind -declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) nounwind -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind - -define i16 @test5() { -; CHECK-LABEL: @test5( -; CHECK-NOT: alloca float -; CHECK: %[[cast:.*]] = bitcast float 0.0{{.*}} to i32 -; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16 -; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16 -; CHECK-NEXT: ret i16 %[[trunc]] - -entry: - %a = alloca [4 x i8] - %fptr = bitcast [4 x i8]* %a to float* - store float 0.0, float* %fptr - %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 2 - %iptr = bitcast i8* %ptr to i16* - %val = load i16, i16* %iptr - ret i16 %val -} - -define i32 @test6() { -; CHECK-LABEL: @test6( -; CHECK: alloca i32 -; CHECK-NEXT: store volatile i32 -; CHECK-NEXT: load i32, i32* -; CHECK-NEXT: ret i32 - -entry: - %a = alloca [4 x i8] - %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i1 true) - %iptr = bitcast i8* %ptr to i32* - %val = load i32, i32* %iptr - ret i32 %val -} - -define void @test7(i8* %src, i8* %dst) { -; CHECK-LABEL: @test7( -; CHECK: alloca i32 -; CHECK-NEXT: bitcast i8* %src to i32* -; CHECK-NEXT: load volatile i32, {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: store volatile i32 {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: bitcast i8* %dst to i32* -; CHECK-NEXT: load volatile i32, {{.*}}, !tbaa [[TAG_3]] -; CHECK-NEXT: store volatile i32 {{.*}}, !tbaa [[TAG_3]] -; CHECK-NEXT: ret - -entry: - %a = alloca [4 x i8] - %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 true), !tbaa !0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 true), !tbaa !3 - ret void -} - - -%S1 = type { i32, i32, [16 x i8] } -%S2 = type { %S1*, %S2* } - -define %S2 @test8(%S2* %s2) { -; CHECK-LABEL: @test8( -entry: - %new = alloca %S2 -; CHECK-NOT: alloca - - %s2.next.ptr = getelementptr %S2, %S2* %s2, i64 0, i32 1 - %s2.next = load %S2*, %S2** %s2.next.ptr, !tbaa !0 -; CHECK: %[[gep:.*]] = getelementptr %S2, %S2* %s2, i64 0, i32 1 -; CHECK-NEXT: %[[next:.*]] = load %S2*, %S2** %[[gep]], !tbaa [[TAG_0]] - - %s2.next.s1.ptr = getelementptr %S2, %S2* %s2.next, i64 0, i32 0 - %s2.next.s1 = load %S1*, %S1** %s2.next.s1.ptr, !tbaa !3 - %new.s1.ptr = getelementptr %S2, %S2* %new, i64 0, i32 0 - store %S1* %s2.next.s1, %S1** %new.s1.ptr, !tbaa !5 - %s2.next.next.ptr = getelementptr %S2, %S2* %s2.next, i64 0, i32 1 - %s2.next.next = load %S2*, %S2** %s2.next.next.ptr, !tbaa !7 - %new.next.ptr = getelementptr %S2, %S2* %new, i64 0, i32 1 - store %S2* %s2.next.next, %S2** %new.next.ptr, !tbaa !9 -; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2, %S2* %[[next]], i64 0, i32 0 -; CHECK-NEXT: %[[next_s1:.*]] = load %S1*, %S1** %[[gep]], !tbaa [[TAG_3]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2, %S2* %[[next]], i64 0, i32 1 -; CHECK-NEXT: %[[next_next:.*]] = load %S2*, %S2** %[[gep]], !tbaa [[TAG_7]] - - %new.s1 = load %S1*, %S1** %new.s1.ptr - %result1 = insertvalue %S2 undef, %S1* %new.s1, 0 -; CHECK-NEXT: %[[result1:.*]] = insertvalue %S2 undef, %S1* %[[next_s1]], 0 - %new.next = load %S2*, %S2** %new.next.ptr - %result2 = insertvalue %S2 %result1, %S2* %new.next, 1 -; CHECK-NEXT: %[[result2:.*]] = insertvalue %S2 %[[result1]], %S2* %[[next_next]], 1 - ret %S2 %result2 -; CHECK-NEXT: ret %S2 %[[result2]] -} - -define i64 @test9() { -; Ensure we can handle loads off the end of an alloca even when wrapped in -; weird bit casts and types. This is valid IR due to the alignment and masking -; off the bits past the end of the alloca. -; -; CHECK-LABEL: @test9( -; CHECK-NOT: alloca -; CHECK: %[[b2:.*]] = zext i8 26 to i64 -; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16 -; CHECK-NEXT: %[[m2:.*]] = and i64 undef, -16711681 -; CHECK-NEXT: %[[i2:.*]] = or i64 %[[m2]], %[[s2]] -; CHECK-NEXT: %[[b1:.*]] = zext i8 0 to i64 -; CHECK-NEXT: %[[s1:.*]] = shl i64 %[[b1]], 8 -; CHECK-NEXT: %[[m1:.*]] = and i64 %[[i2]], -65281 -; CHECK-NEXT: %[[i1:.*]] = or i64 %[[m1]], %[[s1]] -; CHECK-NEXT: %[[b0:.*]] = zext i8 0 to i64 -; CHECK-NEXT: %[[m0:.*]] = and i64 %[[i1]], -256 -; CHECK-NEXT: %[[i0:.*]] = or i64 %[[m0]], %[[b0]] -; CHECK-NEXT: %[[result:.*]] = and i64 %[[i0]], 16777215 -; CHECK-NEXT: ret i64 %[[result]] - -entry: - %a = alloca { [3 x i8] }, align 8 - %gep1 = getelementptr inbounds { [3 x i8] }, { [3 x i8] }* %a, i32 0, i32 0, i32 0 - store i8 0, i8* %gep1, align 1 - %gep2 = getelementptr inbounds { [3 x i8] }, { [3 x i8] }* %a, i32 0, i32 0, i32 1 - store i8 0, i8* %gep2, align 1 - %gep3 = getelementptr inbounds { [3 x i8] }, { [3 x i8] }* %a, i32 0, i32 0, i32 2 - store i8 26, i8* %gep3, align 1 - %cast = bitcast { [3 x i8] }* %a to { i64 }* - %elt = getelementptr inbounds { i64 }, { i64 }* %cast, i32 0, i32 0 - %load = load i64, i64* %elt - %result = and i64 %load, 16777215 - ret i64 %result -} - -define %S2* @test10() { -; CHECK-LABEL: @test10( -; CHECK-NOT: alloca %S2* -; CHECK: ret %S2* null - -entry: - %a = alloca [8 x i8] - %ptr = getelementptr [8 x i8], [8 x i8]* %a, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i1 false) - %s2ptrptr = bitcast i8* %ptr to %S2** - %s2ptr = load %S2*, %S2** %s2ptrptr - ret %S2* %s2ptr -} - -define i32 @test11() { -; CHECK-LABEL: @test11( -; CHECK-NOT: alloca -; CHECK: ret i32 0 - -entry: - %X = alloca i32 - br i1 undef, label %good, label %bad - -good: - %Y = getelementptr i32, i32* %X, i64 0 - store i32 0, i32* %Y - %Z = load i32, i32* %Y - ret i32 %Z - -bad: - %Y2 = getelementptr i32, i32* %X, i64 1 - store i32 0, i32* %Y2 - %Z2 = load i32, i32* %Y2 - ret i32 %Z2 -} - -define i8 @test12() { -; We fully promote these to the i24 load or store size, resulting in just masks -; and other operations that instcombine will fold, but no alloca. -; -; CHECK-LABEL: @test12( - -entry: - %a = alloca [3 x i8] - %b = alloca [3 x i8] -; CHECK-NOT: alloca - - %a0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0 - store i8 0, i8* %a0ptr - %a1ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 1 - store i8 0, i8* %a1ptr - %a2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2 - store i8 0, i8* %a2ptr - %aiptr = bitcast [3 x i8]* %a to i24* - %ai = load i24, i24* %aiptr -; CHECK-NOT: store -; CHECK-NOT: load -; CHECK: %[[ext2:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16 -; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535 -; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[shift2]] -; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8 -; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281 -; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]] -; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], -256 -; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[ext0]] - - %biptr = bitcast [3 x i8]* %b to i24* - store i24 %ai, i24* %biptr - %b0ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 0 - %b0 = load i8, i8* %b0ptr - %b1ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 1 - %b1 = load i8, i8* %b1ptr - %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2 - %b2 = load i8, i8* %b2ptr -; CHECK-NOT: store -; CHECK-NOT: load -; CHECK: %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8 -; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8 -; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8 -; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[insert0]], 16 -; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8 - - %bsum0 = add i8 %b0, %b1 - %bsum1 = add i8 %bsum0, %b2 - ret i8 %bsum1 -; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]] -; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]] -; CHECK-NEXT: ret i8 %[[sum1]] -} - -define i32 @test13() { -; Ensure we don't crash and handle undefined loads that straddle the end of the -; allocation. -; CHECK-LABEL: @test13( -; CHECK: %[[value:.*]] = zext i8 0 to i16 -; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32 -; CHECK-NEXT: ret i32 %[[ret]] - -entry: - %a = alloca [3 x i8], align 2 - %b0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0 - store i8 0, i8* %b0ptr - %b1ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 1 - store i8 0, i8* %b1ptr - %b2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2 - store i8 0, i8* %b2ptr - %iptrcast = bitcast [3 x i8]* %a to i16* - %iptrgep = getelementptr i16, i16* %iptrcast, i64 1 - %i = load i16, i16* %iptrgep - %ret = zext i16 %i to i32 - ret i32 %ret -} - -%test14.struct = type { [3 x i32] } - -define void @test14(...) nounwind uwtable { -; This is a strange case where we split allocas into promotable partitions, but -; also gain enough data to prove they must be dead allocas due to GEPs that walk -; across two adjacent allocas. Test that we don't try to promote or otherwise -; do bad things to these dead allocas, they should just be removed. -; CHECK-LABEL: @test14( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret void - -entry: - %a = alloca %test14.struct - %p = alloca %test14.struct* - %0 = bitcast %test14.struct* %a to i8* - %1 = getelementptr i8, i8* %0, i64 12 - %2 = bitcast i8* %1 to %test14.struct* - %3 = getelementptr inbounds %test14.struct, %test14.struct* %2, i32 0, i32 0 - %4 = getelementptr inbounds %test14.struct, %test14.struct* %a, i32 0, i32 0 - %5 = bitcast [3 x i32]* %3 to i32* - %6 = bitcast [3 x i32]* %4 to i32* - %7 = load i32, i32* %6, align 4 - store i32 %7, i32* %5, align 4 - %8 = getelementptr inbounds i32, i32* %5, i32 1 - %9 = getelementptr inbounds i32, i32* %6, i32 1 - %10 = load i32, i32* %9, align 4 - store i32 %10, i32* %8, align 4 - %11 = getelementptr inbounds i32, i32* %5, i32 2 - %12 = getelementptr inbounds i32, i32* %6, i32 2 - %13 = load i32, i32* %12, align 4 - store i32 %13, i32* %11, align 4 - ret void -} - -define i32 @test15(i1 %flag) nounwind uwtable { -; Ensure that when there are dead instructions using an alloca that are not -; loads or stores we still delete them during partitioning and rewriting. -; Otherwise we'll go to promote them while thy still have unpromotable uses. -; CHECK-LABEL: @test15( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK-NEXT: br label %loop - -entry: - %l0 = alloca i64 - %l1 = alloca i64 - %l2 = alloca i64 - %l3 = alloca i64 - br label %loop - -loop: - %dead3 = phi i8* [ %gep3, %loop ], [ null, %entry ] - - store i64 1879048192, i64* %l0, align 8 - %bc0 = bitcast i64* %l0 to i8* - %gep0 = getelementptr i8, i8* %bc0, i64 3 - %dead0 = bitcast i8* %gep0 to i64* - - store i64 1879048192, i64* %l1, align 8 - %bc1 = bitcast i64* %l1 to i8* - %gep1 = getelementptr i8, i8* %bc1, i64 3 - %dead1 = getelementptr i8, i8* %gep1, i64 1 - - store i64 1879048192, i64* %l2, align 8 - %bc2 = bitcast i64* %l2 to i8* - %gep2.1 = getelementptr i8, i8* %bc2, i64 1 - %gep2.2 = getelementptr i8, i8* %bc2, i64 3 - ; Note that this select should get visited multiple times due to using two - ; different GEPs off the same alloca. We should only delete it once. - %dead2 = select i1 %flag, i8* %gep2.1, i8* %gep2.2 - - store i64 1879048192, i64* %l3, align 8 - %bc3 = bitcast i64* %l3 to i8* - %gep3 = getelementptr i8, i8* %bc3, i64 3 - - br label %loop -} - -define void @test16(i8* %src, i8* %dst) { -; Ensure that we can promote an alloca of [3 x i8] to an i24 SSA value. -; CHECK-LABEL: @test16( -; CHECK-NOT: alloca -; CHECK: %[[srccast:.*]] = bitcast i8* %src to i24* -; CHECK-NEXT: load i24, i24* %[[srccast]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[dstcast:.*]] = bitcast i8* %dst to i24* -; CHECK-NEXT: store i24 0, i24* %[[dstcast]], {{.*}}, !tbaa [[TAG_5]] -; CHECK-NEXT: ret void - -entry: - %a = alloca [3 x i8] - %ptr = getelementptr [3 x i8], [3 x i8]* %a, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 false), !tbaa !0 - %cast = bitcast i8* %ptr to i24* - store i24 0, i24* %cast, !tbaa !3 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 false), !tbaa !5 - ret void -} - -define void @test17(i8* %src, i8* %dst) { -; Ensure that we can rewrite unpromotable memcpys which extend past the end of -; the alloca. -; CHECK-LABEL: @test17( -; CHECK: %[[a:.*]] = alloca [3 x i8] -; CHECK-NEXT: %[[ptr:.*]] = getelementptr [3 x i8], [3 x i8]* %[[a]], i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[ptr]], i8* %src, {{.*}}), !tbaa [[TAG_0]] -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[ptr]], {{.*}}), !tbaa [[TAG_3]] -; CHECK-NEXT: ret void - -entry: - %a = alloca [3 x i8] - %ptr = getelementptr [3 x i8], [3 x i8]* %a, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 true), !tbaa !0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 true), !tbaa !3 - ret void -} - -define void @test18(i8* %src, i8* %dst, i32 %size) { -; Preserve transfer instrinsics with a variable size, even if they overlap with -; fixed size operations. Further, continue to split and promote allocas preceding -; the variable sized intrinsic. -; CHECK-LABEL: @test18( -; CHECK: %[[a:.*]] = alloca [34 x i8] -; CHECK: %[[srcgep1:.*]] = getelementptr inbounds i8, i8* %src, i64 4 -; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32* -; CHECK-NEXT: %[[srcload:.*]] = load i32, i32* %[[srccast1]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[agep1]], i8* %src, i32 %size, {{.*}}), !tbaa [[TAG_3]] -; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[agep2]], i8 42, i32 %size, {{.*}}), !tbaa [[TAG_5]] -; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32* -; CHECK-NEXT: store i32 42, i32* %[[dstcast1]], {{.*}}, !tbaa [[TAG_9]] -; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8, i8* %dst, i64 4 -; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32* -; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]], {{.*}}, !tbaa [[TAG_9]] -; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* align 1 %[[agep3]], i32 %size, {{.*}}), !tbaa [[TAG_11]] -; CHECK-NEXT: ret void - -entry: - %a = alloca [42 x i8] - %ptr = getelementptr [42 x i8], [42 x i8]* %a, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i1 false), !tbaa !0 - %ptr2 = getelementptr [42 x i8], [42 x i8]* %a, i32 0, i32 8 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i1 false), !tbaa !3 - call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i1 false), !tbaa !5 - %cast = bitcast i8* %ptr to i32* - store i32 42, i32* %cast, !tbaa !7 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i1 false), !tbaa !9 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i1 false), !tbaa !11 - ret void -} - -%opaque = type opaque - -define i32 @test19(%opaque* %x) { -; This input will cause us to try to compute a natural GEP when rewriting -; pointers in such a way that we try to GEP through the opaque type. Previously, -; a check for an unsized type was missing and this crashed. Ensure it behaves -; reasonably now. -; CHECK-LABEL: @test19( -; CHECK-NOT: alloca -; CHECK: ret i32 undef - -entry: - %a = alloca { i64, i8* } - %cast1 = bitcast %opaque* %x to i8* - %cast2 = bitcast { i64, i8* }* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i1 false) - %gep = getelementptr inbounds { i64, i8* }, { i64, i8* }* %a, i32 0, i32 0 - %val = load i64, i64* %gep - ret i32 undef -} - -define i32 @test20() { -; Ensure we can track negative offsets (before the beginning of the alloca) and -; negative relative offsets from offsets starting past the end of the alloca. -; CHECK-LABEL: @test20( -; CHECK-NOT: alloca -; CHECK: %[[sum1:.*]] = add i32 1, 2 -; CHECK: %[[sum2:.*]] = add i32 %[[sum1]], 3 -; CHECK: ret i32 %[[sum2]] - -entry: - %a = alloca [3 x i32] - %gep1 = getelementptr [3 x i32], [3 x i32]* %a, i32 0, i32 0 - store i32 1, i32* %gep1 - %gep2.1 = getelementptr [3 x i32], [3 x i32]* %a, i32 0, i32 -2 - %gep2.2 = getelementptr i32, i32* %gep2.1, i32 3 - store i32 2, i32* %gep2.2 - %gep3.1 = getelementptr [3 x i32], [3 x i32]* %a, i32 0, i32 14 - %gep3.2 = getelementptr i32, i32* %gep3.1, i32 -12 - store i32 3, i32* %gep3.2 - - %load1 = load i32, i32* %gep1 - %load2 = load i32, i32* %gep2.2 - %load3 = load i32, i32* %gep3.2 - %sum1 = add i32 %load1, %load2 - %sum2 = add i32 %sum1, %load3 - ret i32 %sum2 -} - -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind - -define i8 @test21() { -; Test allocations and offsets which border on overflow of the int64_t used -; internally. This is really awkward to really test as LLVM doesn't really -; support such extreme constructs cleanly. -; CHECK-LABEL: @test21( -; CHECK-NOT: alloca -; CHECK: or i8 -1, -1 - -entry: - %a = alloca [2305843009213693951 x i8] - %gep0 = getelementptr [2305843009213693951 x i8], [2305843009213693951 x i8]* %a, i64 0, i64 2305843009213693949 - store i8 255, i8* %gep0 - %gep1 = getelementptr [2305843009213693951 x i8], [2305843009213693951 x i8]* %a, i64 0, i64 -9223372036854775807 - %gep2 = getelementptr i8, i8* %gep1, i64 -1 - call void @llvm.memset.p0i8.i64(i8* %gep2, i8 0, i64 18446744073709551615, i1 false) - %gep3 = getelementptr i8, i8* %gep1, i64 9223372036854775807 - %gep4 = getelementptr i8, i8* %gep3, i64 9223372036854775807 - %gep5 = getelementptr i8, i8* %gep4, i64 -6917529027641081857 - store i8 255, i8* %gep5 - %cast1 = bitcast i8* %gep4 to i32* - store i32 0, i32* %cast1 - %load = load i8, i8* %gep0 - %gep6 = getelementptr i8, i8* %gep0, i32 1 - %load2 = load i8, i8* %gep6 - %result = or i8 %load, %load2 - ret i8 %result -} - -%PR13916.struct = type { i8 } - -define void @PR13916.1() { -; Ensure that we handle overlapping memcpy intrinsics correctly, especially in -; the case where there is a directly identical value for both source and dest. -; CHECK: @PR13916.1 -; CHECK-NOT: alloca -; CHECK: ret void - -entry: - %a = alloca i8 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i1 false) - %tmp2 = load i8, i8* %a - ret void -} - -define void @PR13916.2() { -; Check whether we continue to handle them correctly when they start off with -; different pointer value chains, but during rewriting we coalesce them into the -; same value. -; CHECK: @PR13916.2 -; CHECK-NOT: alloca -; CHECK: ret void - -entry: - %a = alloca %PR13916.struct, align 1 - br i1 undef, label %if.then, label %if.end - -if.then: - %tmp0 = bitcast %PR13916.struct* %a to i8* - %tmp1 = bitcast %PR13916.struct* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i1 false) - br label %if.end - -if.end: - %gep = getelementptr %PR13916.struct, %PR13916.struct* %a, i32 0, i32 0 - %tmp2 = load i8, i8* %gep - ret void -} - -define void @PR13990() { -; Ensure we can handle cases where processing one alloca causes the other -; alloca to become dead and get deleted. This might crash or fail under -; Valgrind if we regress. -; CHECK-LABEL: @PR13990( -; CHECK-NOT: alloca -; CHECK: unreachable -; CHECK: unreachable - -entry: - %tmp1 = alloca i8* - %tmp2 = alloca i8* - br i1 undef, label %bb1, label %bb2 - -bb1: - store i8* undef, i8** %tmp2 - br i1 undef, label %bb2, label %bb3 - -bb2: - %tmp50 = select i1 undef, i8** %tmp2, i8** %tmp1 - br i1 undef, label %bb3, label %bb4 - -bb3: - unreachable - -bb4: - unreachable -} - -define double @PR13969(double %x) { -; Check that we detect when promotion will un-escape an alloca and iterate to -; re-try running SROA over that alloca. Without that, the two allocas that are -; stored into a dead alloca don't get rewritten and promoted. -; CHECK-LABEL: @PR13969( - -entry: - %a = alloca double - %b = alloca double* - %c = alloca double -; CHECK-NOT: alloca - - store double %x, double* %a - store double* %c, double** %b - store double* %a, double** %b - store double %x, double* %c - %ret = load double, double* %a -; CHECK-NOT: store -; CHECK-NOT: load - - ret double %ret -; CHECK: ret double %x -} - -%PR14034.struct = type { { {} }, i32, %PR14034.list } -%PR14034.list = type { %PR14034.list*, %PR14034.list* } - -define void @PR14034() { -; This test case tries to form GEPs into the empty leading struct members, and -; subsequently crashed (under valgrind) before we fixed the PR. The important -; thing is to handle empty structs gracefully. -; CHECK-LABEL: @PR14034( - -entry: - %a = alloca %PR14034.struct - %list = getelementptr %PR14034.struct, %PR14034.struct* %a, i32 0, i32 2 - %prev = getelementptr %PR14034.list, %PR14034.list* %list, i32 0, i32 1 - store %PR14034.list* undef, %PR14034.list** %prev - %cast0 = bitcast %PR14034.struct* undef to i8* - %cast1 = bitcast %PR14034.struct* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i1 false) - ret void -} - -define i32 @test22(i32 %x) { -; Test that SROA and promotion is not confused by a grab bax mixture of pointer -; types involving wrapper aggregates and zero-length aggregate members. -; CHECK-LABEL: @test22( - -entry: - %a1 = alloca { { [1 x { i32 }] } } - %a2 = alloca { {}, { float }, [0 x i8] } - %a3 = alloca { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } } -; CHECK-NOT: alloca - - %wrap1 = insertvalue [1 x { i32 }] undef, i32 %x, 0, 0 - %gep1 = getelementptr { { [1 x { i32 }] } }, { { [1 x { i32 }] } }* %a1, i32 0, i32 0, i32 0 - store [1 x { i32 }] %wrap1, [1 x { i32 }]* %gep1 - - %gep2 = getelementptr { { [1 x { i32 }] } }, { { [1 x { i32 }] } }* %a1, i32 0, i32 0 - %ptrcast1 = bitcast { [1 x { i32 }] }* %gep2 to { [1 x { float }] }* - %load1 = load { [1 x { float }] }, { [1 x { float }] }* %ptrcast1 - %unwrap1 = extractvalue { [1 x { float }] } %load1, 0, 0 - - %wrap2 = insertvalue { {}, { float }, [0 x i8] } undef, { float } %unwrap1, 1 - store { {}, { float }, [0 x i8] } %wrap2, { {}, { float }, [0 x i8] }* %a2 - - %gep3 = getelementptr { {}, { float }, [0 x i8] }, { {}, { float }, [0 x i8] }* %a2, i32 0, i32 1, i32 0 - %ptrcast2 = bitcast float* %gep3 to <4 x i8>* - %load3 = load <4 x i8>, <4 x i8>* %ptrcast2 - %valcast1 = bitcast <4 x i8> %load3 to i32 - - %wrap3 = insertvalue [1 x [1 x i32]] undef, i32 %valcast1, 0, 0 - %wrap4 = insertvalue { [1 x [1 x i32]], {} } undef, [1 x [1 x i32]] %wrap3, 0 - %gep4 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }, { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1 - %ptrcast3 = bitcast { [0 x double], [1 x [1 x <4 x i8>]], {} }* %gep4 to { [1 x [1 x i32]], {} }* - store { [1 x [1 x i32]], {} } %wrap4, { [1 x [1 x i32]], {} }* %ptrcast3 - - %gep5 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }, { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1, i32 1, i32 0 - %ptrcast4 = bitcast [1 x <4 x i8>]* %gep5 to { {}, float, {} }* - %load4 = load { {}, float, {} }, { {}, float, {} }* %ptrcast4 - %unwrap2 = extractvalue { {}, float, {} } %load4, 1 - %valcast2 = bitcast float %unwrap2 to i32 - - ret i32 %valcast2 -; CHECK: ret i32 -} - -define void @PR14059.1(double* %d) { -; In PR14059 a peculiar construct was identified as something that is used -; pervasively in ARM's ABI-calling-convention lowering: the passing of a struct -; of doubles via an array of i32 in order to place the data into integer -; registers. This in turn was missed as an optimization by SROA due to the -; partial loads and stores of integers to the double alloca we were trying to -; form and promote. The solution is to widen the integer operations to be -; whole-alloca operations, and perform the appropriate bitcasting on the -; *values* rather than the pointers. When this works, partial reads and writes -; via integers can be promoted away. -; CHECK: @PR14059.1 -; CHECK-NOT: alloca -; CHECK: ret void - -entry: - %X.sroa.0.i = alloca double, align 8 - %0 = bitcast double* %X.sroa.0.i to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) - - ; Store to the low 32-bits... - %X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32* - store i32 0, i32* %X.sroa.0.0.cast2.i, align 8 - - ; Also use a memset to the middle 32-bits for fun. - %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8, i8* %0, i32 2 - call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i1 false) - - ; Or a memset of the whole thing. - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i1 false) - - ; Write to the high 32-bits with a memcpy. - %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8, i8* %0, i32 4 - %d.raw = bitcast double* %d to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i1 false) - - ; Store to the high 32-bits... - %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32* - store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4 - - ; Do the actual math... - %X.sroa.0.0.load1.i = load double, double* %X.sroa.0.i, align 8 - %accum.real.i = load double, double* %d, align 8 - %add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i - store double %add.r.i, double* %d, align 8 - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) - ret void -} - -define i64 @PR14059.2({ float, float }* %phi) { -; Check that SROA can split up alloca-wide integer loads and stores where the -; underlying alloca has smaller components that are accessed independently. This -; shows up particularly with ABI lowering patterns coming out of Clang that rely -; on the particular register placement of a single large integer return value. -; CHECK: @PR14059.2 - -entry: - %retval = alloca { float, float }, align 4 - ; CHECK-NOT: alloca - - %0 = bitcast { float, float }* %retval to i64* - store i64 0, i64* %0 - ; CHECK-NOT: store - - %phi.realp = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 0 - %phi.real = load float, float* %phi.realp - %phi.imagp = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 1 - %phi.imag = load float, float* %phi.imagp - ; CHECK: %[[realp:.*]] = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 0 - ; CHECK-NEXT: %[[real:.*]] = load float, float* %[[realp]] - ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 1 - ; CHECK-NEXT: %[[imag:.*]] = load float, float* %[[imagp]] - - %real = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 0 - %imag = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1 - store float %phi.real, float* %real - store float %phi.imag, float* %imag - ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32 - ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32 - ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64 - ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32 - ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295 - ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]] - ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64 - ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296 - ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]] - - %1 = load i64, i64* %0, align 1 - ret i64 %1 - ; CHECK-NEXT: ret i64 %[[real_insert]] -} - -define void @PR14105({ [16 x i8] }* %ptr) { -; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is -; sign as negative. We use a volatile memcpy to ensure promotion never actually -; occurs. -; CHECK-LABEL: @PR14105( - -entry: - %a = alloca { [16 x i8] }, align 8 -; CHECK: alloca [16 x i8], align 8 - - %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] }* %ptr, i64 -1 -; CHECK-NEXT: getelementptr inbounds { [16 x i8] }, { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0 - - %cast1 = bitcast { [16 x i8 ] }* %gep to i8* - %cast2 = bitcast { [16 x i8 ] }* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true) - ret void -; CHECK: ret -} - -define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) { -; Make sure this the right address space pointer is used for type check. -; CHECK-LABEL: @PR14105_as1( - -entry: - %a = alloca { [16 x i8] }, align 8 -; CHECK: alloca [16 x i8], align 8 - - %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1 -; CHECK-NEXT: getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i16 0 - - %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)* - %cast2 = bitcast { [16 x i8 ] }* %a to i8* - call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true) - ret void -; CHECK: ret -} - -define void @PR14465() { -; Ensure that we don't crash when analyzing a alloca larger than the maximum -; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1). -; CHECK-LABEL: @PR14465( - - %stack = alloca [1048576 x i32], align 16 -; CHECK: alloca [1048576 x i32] - %cast = bitcast [1048576 x i32]* %stack to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %cast, i8 -2, i64 4194304, i1 false) - ret void -; CHECK: ret -} - -define void @PR14548(i1 %x) { -; Handle a mixture of i1 and i8 loads and stores to allocas. This particular -; pattern caused crashes and invalid output in the PR, and its nature will -; trigger a mixture in several permutations as we resolve each alloca -; iteratively. -; Note that we don't do a particularly good *job* of handling these mixtures, -; but the hope is that this is very rare. -; CHECK-LABEL: @PR14548( - -entry: - %a = alloca <{ i1 }>, align 8 - %b = alloca <{ i1 }>, align 8 -; CHECK: %[[a:.*]] = alloca i8, align 8 -; CHECK-NEXT: %[[b:.*]] = alloca i8, align 8 - - %b.i1 = bitcast <{ i1 }>* %b to i1* - store i1 %x, i1* %b.i1, align 8 - %b.i8 = bitcast <{ i1 }>* %b to i8* - %foo = load i8, i8* %b.i8, align 1 -; CHECK-NEXT: %[[b_cast:.*]] = bitcast i8* %[[b]] to i1* -; CHECK-NEXT: store i1 %x, i1* %[[b_cast]], align 8 -; CHECK-NEXT: {{.*}} = load i8, i8* %[[b]], align 8 - - %a.i8 = bitcast <{ i1 }>* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i1 false) nounwind - %bar = load i8, i8* %a.i8, align 1 - %a.i1 = getelementptr inbounds <{ i1 }>, <{ i1 }>* %a, i32 0, i32 0 - %baz = load i1, i1* %a.i1, align 1 -; CHECK-NEXT: %[[copy:.*]] = load i8, i8* %[[b]], align 8 -; CHECK-NEXT: store i8 %[[copy]], i8* %[[a]], align 8 -; CHECK-NEXT: {{.*}} = load i8, i8* %[[a]], align 8 -; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1* -; CHECK-NEXT: {{.*}} = load i1, i1* %[[a_cast]], align 8 - - ret void -} - -define <3 x i8> @PR14572.1(i32 %x) { -; Ensure that a split integer store which is wider than the type size of the -; alloca (relying on the alloc size padding) doesn't trigger an assert. -; CHECK: @PR14572.1 - -entry: - %a = alloca <3 x i8>, align 4 -; CHECK-NOT: alloca - - %cast = bitcast <3 x i8>* %a to i32* - store i32 %x, i32* %cast, align 1 - %y = load <3 x i8>, <3 x i8>* %a, align 4 - ret <3 x i8> %y -; CHECK: ret <3 x i8> -} - -define i32 @PR14572.2(<3 x i8> %x) { -; Ensure that a split integer load which is wider than the type size of the -; alloca (relying on the alloc size padding) doesn't trigger an assert. -; CHECK: @PR14572.2 - -entry: - %a = alloca <3 x i8>, align 4 -; CHECK-NOT: alloca - - store <3 x i8> %x, <3 x i8>* %a, align 1 - %cast = bitcast <3 x i8>* %a to i32* - %y = load i32, i32* %cast, align 4 - ret i32 %y -; CHECK: ret i32 -} - -define i32 @PR14601(i32 %x) { -; Don't try to form a promotable integer alloca when there is a variable length -; memory intrinsic. -; CHECK-LABEL: @PR14601( - -entry: - %a = alloca i32 -; CHECK: alloca - - %a.i8 = bitcast i32* %a to i8* - call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i1 false) - %v = load i32, i32* %a - ret i32 %v -} - -define void @PR15674(i8* %data, i8* %src, i32 %size) { -; Arrange (via control flow) to have unmerged stores of a particular width to -; an alloca where we incrementally store from the end of the array toward the -; beginning of the array. Ensure that the final integer store, despite being -; convertable to the integer type that we end up promoting this alloca toward, -; doesn't get widened to a full alloca store. -; CHECK-LABEL: @PR15674( - -entry: - %tmp = alloca [4 x i8], align 1 -; CHECK: alloca i32 - - switch i32 %size, label %end [ - i32 4, label %bb4 - i32 3, label %bb3 - i32 2, label %bb2 - i32 1, label %bb1 - ] - -bb4: - %src.gep3 = getelementptr inbounds i8, i8* %src, i32 3 - %src.3 = load i8, i8* %src.gep3 - %tmp.gep3 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 3 - store i8 %src.3, i8* %tmp.gep3 -; CHECK: store i8 - - br label %bb3 - -bb3: - %src.gep2 = getelementptr inbounds i8, i8* %src, i32 2 - %src.2 = load i8, i8* %src.gep2 - %tmp.gep2 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 2 - store i8 %src.2, i8* %tmp.gep2 -; CHECK: store i8 - - br label %bb2 - -bb2: - %src.gep1 = getelementptr inbounds i8, i8* %src, i32 1 - %src.1 = load i8, i8* %src.gep1 - %tmp.gep1 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 1 - store i8 %src.1, i8* %tmp.gep1 -; CHECK: store i8 - - br label %bb1 - -bb1: - %src.gep0 = getelementptr inbounds i8, i8* %src, i32 0 - %src.0 = load i8, i8* %src.gep0 - %tmp.gep0 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 0 - store i8 %src.0, i8* %tmp.gep0 -; CHECK: store i8 - - br label %end - -end: - %tmp.raw = bitcast [4 x i8]* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i1 false) - ret void -; CHECK: ret void -} - -define void @PR15805(i1 %a, i1 %b) { -; CHECK-LABEL: @PR15805( -; CHECK-NOT: alloca -; CHECK: ret void - - %c = alloca i64, align 8 - %p.0.c = select i1 undef, i64* %c, i64* %c - %cond.in = select i1 undef, i64* %p.0.c, i64* %c - %cond = load i64, i64* %cond.in, align 8 - ret void -} - -define void @PR15805.1(i1 %a, i1 %b) { -; Same as the normal PR15805, but rigged to place the use before the def inside -; of looping unreachable code. This helps ensure that we aren't sensitive to the -; order in which the uses of the alloca are visited. -; -; CHECK-LABEL: @PR15805.1( -; CHECK-NOT: alloca -; CHECK: ret void - - %c = alloca i64, align 8 - br label %exit - -loop: - %cond.in = select i1 undef, i64* %c, i64* %p.0.c - %p.0.c = select i1 undef, i64* %c, i64* %c - %cond = load i64, i64* %cond.in, align 8 - br i1 undef, label %loop, label %exit - -exit: - ret void -} - -define void @PR16651.1(i8* %a) { -; This test case caused a crash due to the volatile memcpy in combination with -; lowering to integer loads and stores of a width other than that of the original -; memcpy. -; -; CHECK-LABEL: @PR16651.1( -; CHECK: alloca i16 -; CHECK: alloca i8 -; CHECK: alloca i8 -; CHECK: unreachable - -entry: - %b = alloca i32, align 4 - %b.cast = bitcast i32* %b to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %b.cast, i8* align 4 %a, i32 4, i1 true) - %b.gep = getelementptr inbounds i8, i8* %b.cast, i32 2 - load i8, i8* %b.gep, align 2 - unreachable -} - -define void @PR16651.2() { -; This test case caused a crash due to failing to promote given a select that -; can't be speculated. It shouldn't be promoted, but we missed that fact when -; analyzing whether we could form a vector promotion because that code didn't -; bail on select instructions. -; -; CHECK-LABEL: @PR16651.2( -; CHECK: alloca <2 x float> -; CHECK: ret void - -entry: - %tv1 = alloca { <2 x float>, <2 x float> }, align 8 - %0 = getelementptr { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1 - store <2 x float> undef, <2 x float>* %0, align 8 - %1 = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1, i64 0 - %cond105.in.i.i = select i1 undef, float* null, float* %1 - %cond105.i.i = load float, float* %cond105.in.i.i, align 8 - ret void -} - -define void @test23(i32 %x) { -; CHECK-LABEL: @test23( -; CHECK-NOT: alloca -; CHECK: ret void -entry: - %a = alloca i32, align 4 - store i32 %x, i32* %a, align 4 - %gep1 = getelementptr inbounds i32, i32* %a, i32 1 - %gep0 = getelementptr inbounds i32, i32* %a, i32 0 - %cast1 = bitcast i32* %gep1 to i8* - %cast0 = bitcast i32* %gep0 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i1 false) - ret void -} - -define void @PR18615() { -; CHECK-LABEL: @PR18615( -; CHECK-NOT: alloca -; CHECK: ret void -entry: - %f = alloca i8 - %gep = getelementptr i8, i8* %f, i64 -1 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %gep, i32 1, i1 false) - ret void -} - -define void @test24(i8* %src, i8* %dst) { -; CHECK-LABEL: @test24( -; CHECK: alloca i64, align 16 -; CHECK: load volatile i64, i64* %{{[^,]*}}, align 1, !tbaa [[TAG_0]] -; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 16, !tbaa [[TAG_0]] -; CHECK: load volatile i64, i64* %{{[^,]*}}, align 16, !tbaa [[TAG_3]] -; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 1, !tbaa [[TAG_3]] - -entry: - %a = alloca i64, align 16 - %ptr = bitcast i64* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i1 true), !tbaa !0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i1 true), !tbaa !3 - ret void -} - -define float @test25() { -; Check that we split up stores in order to promote the smaller SSA values.. These types -; of patterns can arise because LLVM maps small memcpy's to integer load and -; stores. If we get a memcpy of an aggregate (such as C and C++ frontends would -; produce, but so might any language frontend), this will in many cases turn into -; an integer load and store. SROA needs to be extremely powerful to correctly -; handle these cases and form splitable and promotable SSA values. -; -; CHECK-LABEL: @test25( -; CHECK-NOT: alloca -; CHECK: %[[F1:.*]] = bitcast i32 0 to float -; CHECK: %[[F2:.*]] = bitcast i32 1065353216 to float -; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]] -; CHECK: ret float %[[SUM]] - -entry: - %a = alloca i64 - %b = alloca i64 - %a.cast = bitcast i64* %a to [2 x float]* - %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0 - %a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1 - %b.cast = bitcast i64* %b to [2 x float]* - %b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0 - %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1 - store float 0.0, float* %a.gep1 - store float 1.0, float* %a.gep2 - %v = load i64, i64* %a - store i64 %v, i64* %b - %f1 = load float, float* %b.gep1 - %f2 = load float, float* %b.gep2 - %ret = fadd float %f1, %f2 - ret float %ret -} - -@complex1 = external global [2 x float] -@complex2 = external global [2 x float] - -define void @test26() { -; Test a case of splitting up loads and stores against a globals. -; -; CHECK-LABEL: @test26( -; CHECK-NOT: alloca -; CHECK: %[[L1:.*]] = load i32, i32* bitcast -; CHECK: %[[L2:.*]] = load i32, i32* bitcast -; CHECK: %[[F1:.*]] = bitcast i32 %[[L1]] to float -; CHECK: %[[F2:.*]] = bitcast i32 %[[L2]] to float -; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]] -; CHECK: %[[C1:.*]] = bitcast float %[[SUM]] to i32 -; CHECK: %[[C2:.*]] = bitcast float %[[SUM]] to i32 -; CHECK: store i32 %[[C1]], i32* bitcast -; CHECK: store i32 %[[C2]], i32* bitcast -; CHECK: ret void - -entry: - %a = alloca i64 - %a.cast = bitcast i64* %a to [2 x float]* - %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0 - %a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1 - %v1 = load i64, i64* bitcast ([2 x float]* @complex1 to i64*) - store i64 %v1, i64* %a - %f1 = load float, float* %a.gep1 - %f2 = load float, float* %a.gep2 - %sum = fadd float %f1, %f2 - store float %sum, float* %a.gep1 - store float %sum, float* %a.gep2 - %v2 = load i64, i64* %a - store i64 %v2, i64* bitcast ([2 x float]* @complex2 to i64*) - ret void -} - -define float @test27() { -; Another, more complex case of splittable i64 loads and stores. This example -; is a particularly challenging one because the load and store both point into -; the alloca SROA is processing, and they overlap but at an offset. -; -; CHECK-LABEL: @test27( -; CHECK-NOT: alloca -; CHECK: %[[F1:.*]] = bitcast i32 0 to float -; CHECK: %[[F2:.*]] = bitcast i32 1065353216 to float -; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]] -; CHECK: ret float %[[SUM]] - -entry: - %a = alloca [12 x i8] - %gep1 = getelementptr [12 x i8], [12 x i8]* %a, i32 0, i32 0 - %gep2 = getelementptr [12 x i8], [12 x i8]* %a, i32 0, i32 4 - %gep3 = getelementptr [12 x i8], [12 x i8]* %a, i32 0, i32 8 - %iptr1 = bitcast i8* %gep1 to i64* - %iptr2 = bitcast i8* %gep2 to i64* - %fptr1 = bitcast i8* %gep1 to float* - %fptr2 = bitcast i8* %gep2 to float* - %fptr3 = bitcast i8* %gep3 to float* - store float 0.0, float* %fptr1 - store float 1.0, float* %fptr2 - %v = load i64, i64* %iptr1 - store i64 %v, i64* %iptr2 - %f1 = load float, float* %fptr2 - %f2 = load float, float* %fptr3 - %ret = fadd float %f1, %f2 - ret float %ret -} - -define i32 @PR22093() { -; Test that we don't try to pre-split a splittable store of a splittable but -; not pre-splittable load over the same alloca. We "handle" this case when the -; load is unsplittable but unrelated to this alloca by just generating extra -; loads without touching the original, but when the original load was out of -; this alloca we need to handle it specially to ensure the splits line up -; properly for rewriting. -; -; CHECK-LABEL: @PR22093( -; CHECK-NOT: alloca -; CHECK: alloca i16 -; CHECK-NOT: alloca -; CHECK: store volatile i16 - -entry: - %a = alloca i32 - %a.cast = bitcast i32* %a to i16* - store volatile i16 42, i16* %a.cast - %load = load i32, i32* %a - store i32 %load, i32* %a - ret i32 %load -} - -define void @PR22093.2() { -; Another way that we end up being unable to split a particular set of loads -; and stores can even have ordering importance. Here we have a load which is -; pre-splittable by itself, and the first store is also compatible. But the -; second store of the load makes the load unsplittable because of a mismatch of -; splits. Because this makes the load unsplittable, we also have to go back and -; remove the first store from the presplit candidates as its load won't be -; presplit. -; -; CHECK-LABEL: @PR22093.2( -; CHECK-NOT: alloca -; CHECK: alloca i16 -; CHECK-NEXT: alloca i8 -; CHECK-NOT: alloca -; CHECK: store volatile i16 -; CHECK: store volatile i8 - -entry: - %a = alloca i64 - %a.cast1 = bitcast i64* %a to i32* - %a.cast2 = bitcast i64* %a to i16* - store volatile i16 42, i16* %a.cast2 - %load = load i32, i32* %a.cast1 - store i32 %load, i32* %a.cast1 - %a.gep1 = getelementptr i32, i32* %a.cast1, i32 1 - %a.cast3 = bitcast i32* %a.gep1 to i8* - store volatile i8 13, i8* %a.cast3 - store i32 %load, i32* %a.gep1 - ret void -} - -define void @PR23737() { -; CHECK-LABEL: @PR23737( -; CHECK: store atomic volatile {{.*}} seq_cst -; CHECK: load atomic volatile {{.*}} seq_cst -entry: - %ptr = alloca i64, align 8 - store atomic volatile i64 0, i64* %ptr seq_cst, align 8 - %load = load atomic volatile i64, i64* %ptr seq_cst, align 8 - ret void -} - -define i16 @PR24463() { -; Ensure we can handle a very interesting case where there is an integer-based -; rewrite of the uses of the alloca, but where one of the integers in that is -; a sub-integer that requires extraction *and* extends past the end of the -; alloca. SROA can split the alloca to avoid shift or trunc. -; -; CHECK-LABEL: @PR24463( -; CHECK-NOT: alloca -; CHECK-NOT: trunc -; CHECK-NOT: lshr -; CHECK: %[[ZEXT:.*]] = zext i8 {{.*}} to i16 -; CHECK: ret i16 %[[ZEXT]] -entry: - %alloca = alloca [3 x i8] - %gep1 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 1 - %bc1 = bitcast i8* %gep1 to i16* - store i16 0, i16* %bc1 - %gep2 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 2 - %bc2 = bitcast i8* %gep2 to i16* - %load = load i16, i16* %bc2 - ret i16 %load -} - -%struct.STest = type { %struct.SPos, %struct.SPos } -%struct.SPos = type { float, float } - -define void @PR25873(%struct.STest* %outData) { -; CHECK-LABEL: @PR25873( -; CHECK: store i32 1123418112 -; CHECK: store i32 1139015680 -; CHECK: %[[HIZEXT:.*]] = zext i32 1139015680 to i64 -; CHECK: %[[HISHL:.*]] = shl i64 %[[HIZEXT]], 32 -; CHECK: %[[HIMASK:.*]] = and i64 undef, 4294967295 -; CHECK: %[[HIINSERT:.*]] = or i64 %[[HIMASK]], %[[HISHL]] -; CHECK: %[[LOZEXT:.*]] = zext i32 1123418112 to i64 -; CHECK: %[[LOMASK:.*]] = and i64 %[[HIINSERT]], -4294967296 -; CHECK: %[[LOINSERT:.*]] = or i64 %[[LOMASK]], %[[LOZEXT]] -; CHECK: store i64 %[[LOINSERT]] -entry: - %tmpData = alloca %struct.STest, align 8 - %0 = bitcast %struct.STest* %tmpData to i8* - call void @llvm.lifetime.start.p0i8(i64 16, i8* %0) - %x = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 0, i32 0 - store float 1.230000e+02, float* %x, align 8 - %y = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 0, i32 1 - store float 4.560000e+02, float* %y, align 4 - %m_posB = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 1 - %1 = bitcast %struct.STest* %tmpData to i64* - %2 = bitcast %struct.SPos* %m_posB to i64* - %3 = load i64, i64* %1, align 8 - store i64 %3, i64* %2, align 8 - %4 = bitcast %struct.STest* %outData to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %4, i8* align 4 %0, i64 16, i1 false) - call void @llvm.lifetime.end.p0i8(i64 16, i8* %0) - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind - -define void @PR27999() unnamed_addr { -; CHECK-LABEL: @PR27999( -; CHECK: entry-block: -; CHECK-NEXT: ret void -entry-block: - %0 = alloca [2 x i64], align 8 - %1 = bitcast [2 x i64]* %0 to i8* - call void @llvm.lifetime.start.p0i8(i64 16, i8* %1) - %2 = getelementptr inbounds [2 x i64], [2 x i64]* %0, i32 0, i32 1 - %3 = bitcast i64* %2 to i8* - call void @llvm.lifetime.end.p0i8(i64 8, i8* %3) - ret void -} - -define void @PR29139() { -; CHECK-LABEL: @PR29139( -; CHECK: bb1: -; CHECK-NEXT: ret void -bb1: - %e.7.sroa.6.i = alloca i32, align 1 - %e.7.sroa.6.0.load81.i = load i32, i32* %e.7.sroa.6.i, align 1 - %0 = bitcast i32* %e.7.sroa.6.i to i8* - call void @llvm.lifetime.end.p0i8(i64 2, i8* %0) - ret void -} - -; PR35657 reports assertion failure with this code -define void @PR35657(i64 %v) { -; CHECK-LABEL: @PR35657 -; CHECK: call void @callee16(i16 %{{.*}}) -; CHECK: call void @callee48(i48 %{{.*}}) -; CHECK: ret void -entry: - %a48 = alloca i48 - %a48.cast64 = bitcast i48* %a48 to i64* - store i64 %v, i64* %a48.cast64 - %a48.cast16 = bitcast i48* %a48 to i16* - %b0_15 = load i16, i16* %a48.cast16 - %a48.cast8 = bitcast i48* %a48 to i8* - %a48_offset2 = getelementptr inbounds i8, i8* %a48.cast8, i64 2 - %a48_offset2.cast48 = bitcast i8* %a48_offset2 to i48* - %b16_63 = load i48, i48* %a48_offset2.cast48, align 2 - call void @callee16(i16 %b0_15) - call void @callee48(i48 %b16_63) - ret void -} - -declare void @callee16(i16 %a) -declare void @callee48(i48 %a) - -define void @test28(i64 %v) #0 { -; SROA should split the first i64 store to avoid additional and/or instructions -; when storing into i32 fields - -; CHECK-LABEL: @test28( -; CHECK-NOT: alloca -; CHECK-NOT: and -; CHECK-NOT: or -; CHECK: %[[shift:.*]] = lshr i64 %v, 32 -; CHECK-NEXT: %{{.*}} = trunc i64 %[[shift]] to i32 -; CHECK-NEXT: ret void - -entry: - %t = alloca { i64, i32, i32 } - - %b = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 1 - %0 = bitcast i32* %b to i64* - store i64 %v, i64* %0 - - %1 = load i32, i32* %b - %c = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 2 - store i32 %1, i32* %c - ret void -} - -declare void @llvm.lifetime.start.isVoid.i64.p0i8(i64, [10 x float]* nocapture) -declare void @llvm.lifetime.end.isVoid.i64.p0i8(i64, [10 x float]* nocapture) -@array = dso_local global [10 x float] undef, align 4 - -define void @test29(i32 %num, i32 %tid) { -; CHECK-LABEL: @test29( -; CHECK-NOT: alloca [10 x float] -; CHECK: ret void - -entry: - %ra = alloca [10 x float], align 4 - call void @llvm.lifetime.start.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra) - - %cmp1 = icmp sgt i32 %num, 0 - br i1 %cmp1, label %bb1, label %bb7 - -bb1: - %tobool = icmp eq i32 %tid, 0 - %conv.i = zext i32 %tid to i64 - %0 = bitcast [10 x float]* %ra to i32* - %1 = load i32, i32* %0, align 4 - %arrayidx5 = getelementptr inbounds [10 x float], [10 x float]* @array, i64 0, i64 %conv.i - %2 = bitcast float* %arrayidx5 to i32* - br label %bb2 - -bb2: - %i.02 = phi i32 [ %num, %bb1 ], [ %sub, %bb5 ] - br i1 %tobool, label %bb3, label %bb4 - -bb3: - br label %bb5 - -bb4: - store i32 %1, i32* %2, align 4 - br label %bb5 - -bb5: - %sub = add i32 %i.02, -1 - %cmp = icmp sgt i32 %sub, 0 - br i1 %cmp, label %bb2, label %bb6 - -bb6: - br label %bb7 - -bb7: - call void @llvm.lifetime.end.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra) - ret void -} - -!0 = !{!1, !1, i64 0, i64 1} -!1 = !{!2, i64 1, !"type_0"} -!2 = !{!"root"} -!3 = !{!4, !4, i64 0, i64 1} -!4 = !{!2, i64 1, !"type_3"} -!5 = !{!6, !6, i64 0, i64 1} -!6 = !{!2, i64 1, !"type_5"} -!7 = !{!8, !8, i64 0, i64 1} -!8 = !{!2, i64 1, !"type_7"} -!9 = !{!10, !10, i64 0, i64 1} -!10 = !{!2, i64 1, !"type_9"} -!11 = !{!12, !12, i64 0, i64 1} -!12 = !{!2, i64 1, !"type_11"} -!13 = !{!14, !14, i64 0, i64 1} -!14 = !{!2, i64 1, !"type_13"} -!15 = !{!16, !16, i64 0, i64 1} -!16 = !{!2, i64 1, !"type_15"} -!17 = !{!18, !18, i64 0, i64 1} -!18 = !{!2, i64 1, !"type_17"} -!19 = !{!20, !20, i64 0, i64 1} -!20 = !{!2, i64 1, !"type_19"} -!21 = !{!22, !22, i64 0, i64 1} -!22 = !{!2, i64 1, !"type_21"} -!23 = !{!24, !24, i64 0, i64 1} -!24 = !{!2, i64 1, !"type_23"} -!25 = !{!26, !26, i64 0, i64 1} -!26 = !{!2, i64 1, !"type_25"} -!27 = !{!28, !28, i64 0, i64 1} -!28 = !{!2, i64 1, !"type_27"} -!29 = !{!30, !30, i64 0, i64 1} -!30 = !{!2, i64 1, !"type_29"} -!31 = !{!32, !32, i64 0, i64 1} -!32 = !{!2, i64 1, !"type_31"} -!33 = !{!34, !34, i64 0, i64 1} -!34 = !{!2, i64 1, !"type_33"} -!35 = !{!36, !36, i64 0, i64 1} -!36 = !{!2, i64 1, !"type_35"} -!37 = !{!38, !38, i64 0, i64 1} -!38 = !{!2, i64 1, !"type_37"} -!39 = !{!40, !40, i64 0, i64 1} -!40 = !{!2, i64 1, !"type_39"} -!41 = !{!42, !42, i64 0, i64 1} -!42 = !{!2, i64 1, !"type_41"} -!43 = !{!44, !44, i64 0, i64 1} -!44 = !{!2, i64 1, !"type_43"} -!45 = !{!46, !46, i64 0, i64 1} -!46 = !{!2, i64 1, !"type_45"} -!47 = !{!48, !48, i64 0, i64 1} -!48 = !{!2, i64 1, !"type_47"} -!49 = !{!50, !50, i64 0, i64 1} -!50 = !{!2, i64 1, !"type_49"} -!51 = !{!52, !52, i64 0, i64 1} -!52 = !{!2, i64 1, !"type_51"} -!53 = !{!54, !54, i64 0, i64 1} -!54 = !{!2, i64 1, !"type_53"} -!55 = !{!56, !56, i64 0, i64 1} -!56 = !{!2, i64 1, !"type_55"} -!57 = !{!58, !58, i64 0, i64 1} -!58 = !{!2, i64 1, !"type_57"} -!59 = !{!60, !60, i64 0, i64 1} -!60 = !{!2, i64 1, !"type_59"} - -; CHECK-DAG: [[TYPE_0:!.*]] = !{{{.*}}, !"type_0"} -; CHECK-DAG: [[TAG_0]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_3:!.*]] = !{{{.*}}, !"type_3"} -; CHECK-DAG: [[TAG_3]] = !{[[TYPE_3]], [[TYPE_3]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_5:!.*]] = !{{{.*}}, !"type_5"} -; CHECK-DAG: [[TAG_5]] = !{[[TYPE_5]], [[TYPE_5]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_7:!.*]] = !{{{.*}}, !"type_7"} -; CHECK-DAG: [[TAG_7]] = !{[[TYPE_7]], [[TYPE_7]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_9:!.*]] = !{{{.*}}, !"type_9"} -; CHECK-DAG: [[TAG_9]] = !{[[TYPE_9]], [[TYPE_9]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_11:!.*]] = !{{{.*}}, !"type_11"} -; CHECK-DAG: [[TAG_11]] = !{[[TYPE_11]], [[TYPE_11]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_13:!.*]] = !{{{.*}}, !"type_13"} -; CHECK-DAG: [[TAG_13]] = !{[[TYPE_13]], [[TYPE_13]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_15:!.*]] = !{{{.*}}, !"type_15"} -; CHECK-DAG: [[TAG_15]] = !{[[TYPE_15]], [[TYPE_15]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_17:!.*]] = !{{{.*}}, !"type_17"} -; CHECK-DAG: [[TAG_17]] = !{[[TYPE_17]], [[TYPE_17]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_19:!.*]] = !{{{.*}}, !"type_19"} -; CHECK-DAG: [[TAG_19]] = !{[[TYPE_19]], [[TYPE_19]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_21:!.*]] = !{{{.*}}, !"type_21"} -; CHECK-DAG: [[TAG_21]] = !{[[TYPE_21]], [[TYPE_21]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_23:!.*]] = !{{{.*}}, !"type_23"} -; CHECK-DAG: [[TAG_23]] = !{[[TYPE_23]], [[TYPE_23]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_25:!.*]] = !{{{.*}}, !"type_25"} -; CHECK-DAG: [[TAG_25]] = !{[[TYPE_25]], [[TYPE_25]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_27:!.*]] = !{{{.*}}, !"type_27"} -; CHECK-DAG: [[TAG_27]] = !{[[TYPE_27]], [[TYPE_27]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_29:!.*]] = !{{{.*}}, !"type_29"} -; CHECK-DAG: [[TAG_29]] = !{[[TYPE_29]], [[TYPE_29]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_31:!.*]] = !{{{.*}}, !"type_31"} -; CHECK-DAG: [[TAG_31]] = !{[[TYPE_31]], [[TYPE_31]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_33:!.*]] = !{{{.*}}, !"type_33"} -; CHECK-DAG: [[TAG_33]] = !{[[TYPE_33]], [[TYPE_33]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_35:!.*]] = !{{{.*}}, !"type_35"} -; CHECK-DAG: [[TAG_35]] = !{[[TYPE_35]], [[TYPE_35]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_37:!.*]] = !{{{.*}}, !"type_37"} -; CHECK-DAG: [[TAG_37]] = !{[[TYPE_37]], [[TYPE_37]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_39:!.*]] = !{{{.*}}, !"type_39"} -; CHECK-DAG: [[TAG_39]] = !{[[TYPE_39]], [[TYPE_39]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_41:!.*]] = !{{{.*}}, !"type_41"} -; CHECK-DAG: [[TAG_41]] = !{[[TYPE_41]], [[TYPE_41]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_43:!.*]] = !{{{.*}}, !"type_43"} -; CHECK-DAG: [[TAG_43]] = !{[[TYPE_43]], [[TYPE_43]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_45:!.*]] = !{{{.*}}, !"type_45"} -; CHECK-DAG: [[TAG_45]] = !{[[TYPE_45]], [[TYPE_45]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_47:!.*]] = !{{{.*}}, !"type_47"} -; CHECK-DAG: [[TAG_47]] = !{[[TYPE_47]], [[TYPE_47]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_49:!.*]] = !{{{.*}}, !"type_49"} -; CHECK-DAG: [[TAG_49]] = !{[[TYPE_49]], [[TYPE_49]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_51:!.*]] = !{{{.*}}, !"type_51"} -; CHECK-DAG: [[TAG_51]] = !{[[TYPE_51]], [[TYPE_51]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_53:!.*]] = !{{{.*}}, !"type_53"} -; CHECK-DAG: [[TAG_53]] = !{[[TYPE_53]], [[TYPE_53]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_55:!.*]] = !{{{.*}}, !"type_55"} -; CHECK-DAG: [[TAG_55]] = !{[[TYPE_55]], [[TYPE_55]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_57:!.*]] = !{{{.*}}, !"type_57"} -; CHECK-DAG: [[TAG_57]] = !{[[TYPE_57]], [[TYPE_57]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_59:!.*]] = !{{{.*}}, !"type_59"} -; CHECK-DAG: [[TAG_59]] = !{[[TYPE_59]], [[TYPE_59]], i64 0, i64 1} diff --git a/llvm/test/Transforms/SROA/big-endian.ll b/llvm/test/Transforms/SROA/big-endian.ll deleted file mode 100644 index 91165456038..00000000000 --- a/llvm/test/Transforms/SROA/big-endian.ll +++ /dev/null @@ -1,252 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s - -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -define i8 @test1() { -; We fully promote these to the i24 load or store size, resulting in just masks -; and other operations that instcombine will fold, but no alloca. Note this is -; the same as test12 in basictest.ll, but here we assert big-endian byte -; ordering. -; -; CHECK-LABEL: @test1( - -entry: - %a = alloca [3 x i8] - %b = alloca [3 x i8] -; CHECK-NOT: alloca - - %a0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0 - store i8 0, i8* %a0ptr - %a1ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 1 - store i8 0, i8* %a1ptr - %a2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2 - store i8 0, i8* %a2ptr - %aiptr = bitcast [3 x i8]* %a to i24* - %ai = load i24, i24* %aiptr -; CHECK-NOT: store -; CHECK-NOT: load -; CHECK: %[[ext2:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256 -; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]] -; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8 -; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281 -; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]] -; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[shift0:.*]] = shl i24 %[[ext0]], 16 -; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], 65535 -; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[shift0]] - - %biptr = bitcast [3 x i8]* %b to i24* - store i24 %ai, i24* %biptr - %b0ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 0 - %b0 = load i8, i8* %b0ptr - %b1ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 1 - %b1 = load i8, i8* %b1ptr - %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2 - %b2 = load i8, i8* %b2ptr -; CHECK-NOT: store -; CHECK-NOT: load -; CHECK: %[[shift0:.*]] = lshr i24 %[[insert0]], 16 -; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8 -; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8 -; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8 -; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[insert0]] to i8 - - %bsum0 = add i8 %b0, %b1 - %bsum1 = add i8 %bsum0, %b2 - ret i8 %bsum1 -; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]] -; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]] -; CHECK-NEXT: ret i8 %[[sum1]] -} - -define i64 @test2() { -; Test for various mixed sizes of integer loads and stores all getting -; promoted. -; -; CHECK-LABEL: @test2( - -entry: - %a = alloca [7 x i8] -; CHECK-NOT: alloca - - %a0ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 0 - %a1ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 1 - %a2ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 2 - %a3ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 3 - -; CHECK-NOT: store -; CHECK-NOT: load - - %a0i16ptr = bitcast i8* %a0ptr to i16* - store i16 1, i16* %a0i16ptr - - store i8 1, i8* %a2ptr - - %a3i24ptr = bitcast i8* %a3ptr to i24* - store i24 1, i24* %a3i24ptr - - %a2i40ptr = bitcast i8* %a2ptr to i40* - store i40 1, i40* %a2i40ptr - -; the alloca is splitted into multiple slices -; Here, i8 1 is for %a[6] -; CHECK: %[[ext1:.*]] = zext i8 1 to i40 -; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, -256 -; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], %[[ext1]] - -; Here, i24 0 is for %a[3] to %a[5] -; CHECK-NEXT: %[[ext2:.*]] = zext i24 0 to i40 -; CHECK-NEXT: %[[shift2:.*]] = shl i40 %[[ext2]], 8 -; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041 -; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], %[[shift2]] - -; Here, i8 0 is for %a[2] -; CHECK-NEXT: %[[ext3:.*]] = zext i8 0 to i40 -; CHECK-NEXT: %[[shift3:.*]] = shl i40 %[[ext3]], 32 -; CHECK-NEXT: %[[mask3:.*]] = and i40 %[[insert2]], 4294967295 -; CHECK-NEXT: %[[insert3:.*]] = or i40 %[[mask3]], %[[shift3]] - -; CHECK-NEXT: %[[ext4:.*]] = zext i40 %[[insert3]] to i56 -; CHECK-NEXT: %[[mask4:.*]] = and i56 undef, -1099511627776 -; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[ext4]] - -; CHECK-NOT: store -; CHECK-NOT: load - - %aiptr = bitcast [7 x i8]* %a to i56* - %ai = load i56, i56* %aiptr - %ret = zext i56 %ai to i64 - ret i64 %ret -; Here, i16 1 is for %a[0] to %a[1] -; CHECK-NEXT: %[[ext5:.*]] = zext i16 1 to i56 -; CHECK-NEXT: %[[shift5:.*]] = shl i56 %[[ext5]], 40 -; CHECK-NEXT: %[[mask5:.*]] = and i56 %[[insert4]], 1099511627775 -; CHECK-NEXT: %[[insert5:.*]] = or i56 %[[mask5]], %[[shift5]] -; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert5]] to i64 -; CHECK-NEXT: ret i64 %[[ret]] -} - -define i64 @PR14132(i1 %flag) { -; CHECK-LABEL: @PR14132( -; Here we form a PHI-node by promoting the pointer alloca first, and then in -; order to promote the other two allocas, we speculate the load of the -; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8 -; alloca. While this is a bit dubious, we were asserting on trying to -; rewrite it. The trick is that the code using the value may carefully take -; steps to only use the not-undef bits, and so we need to at least loosely -; support this. This test is particularly interesting because how we handle -; a load of an i64 from an i8 alloca is dependent on endianness. -entry: - %a = alloca i64, align 8 - %b = alloca i8, align 8 - %ptr = alloca i64*, align 8 -; CHECK-NOT: alloca - - %ptr.cast = bitcast i64** %ptr to i8** - store i64 0, i64* %a - store i8 1, i8* %b - store i64* %a, i64** %ptr - br i1 %flag, label %if.then, label %if.end - -if.then: - store i8* %b, i8** %ptr.cast - br label %if.end -; CHECK-NOT: store -; CHECK: %[[ext:.*]] = zext i8 1 to i64 -; CHECK: %[[shift:.*]] = shl i64 %[[ext]], 56 - -if.end: - %tmp = load i64*, i64** %ptr - %result = load i64, i64* %tmp -; CHECK-NOT: load -; CHECK: %[[result:.*]] = phi i64 [ %[[shift]], %if.then ], [ 0, %entry ] - - ret i64 %result -; CHECK-NEXT: ret i64 %[[result]] -} - -declare void @f(i64 %x, i32 %y) - -define void @test3() { -; CHECK-LABEL: @test3( -; -; This is a test that specifically exercises the big-endian lowering because it -; ends up splitting a 64-bit integer into two smaller integers and has a number -; of tricky aspects (the i24 type) that make that hard. Historically, SROA -; would miscompile this by either dropping a most significant byte or least -; significant byte due to shrinking the [4,8) slice to an i24, or by failing to -; move the bytes around correctly. -; -; The magical number 34494054408 is used because it has bits set in various -; bytes so that it is clear if those bytes fail to be propagated. -; -; If you're debugging this, rather than using the direct magical numbers, run -; the IR through '-sroa -instcombine'. With '-instcombine' these will be -; constant folded, and if the i64 doesn't round-trip correctly, you've found -; a bug! -; -entry: - %a = alloca { i32, i24 }, align 4 -; CHECK-NOT: alloca - - %tmp0 = bitcast { i32, i24 }* %a to i64* - store i64 34494054408, i64* %tmp0 - %tmp1 = load i64, i64* %tmp0, align 4 - %tmp2 = bitcast { i32, i24 }* %a to i32* - %tmp3 = load i32, i32* %tmp2, align 4 -; CHECK: %[[HI_EXT:.*]] = zext i32 134316040 to i64 -; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296 -; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]] -; CHECK: %[[LO_EXT:.*]] = zext i32 8 to i64 -; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32 -; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295 -; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]] - - call void @f(i64 %tmp1, i32 %tmp3) -; CHECK: call void @f(i64 %[[LO_MERGE]], i32 8) - ret void -; CHECK: ret void -} - -define void @test4() { -; CHECK-LABEL: @test4 -; -; Much like @test3, this is specifically testing big-endian management of data. -; Also similarly, it uses constants with particular bits set to help track -; whether values are corrupted, and can be easily evaluated by running through -; -instcombine to see that the i64 round-trips. -; -entry: - %a = alloca { i32, i24 }, align 4 - %a2 = alloca i64, align 4 -; CHECK-NOT: alloca - - store i64 34494054408, i64* %a2 - %tmp0 = bitcast { i32, i24 }* %a to i8* - %tmp1 = bitcast i64* %a2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp0, i8* align 4 %tmp1, i64 8, i1 false) -; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32 -; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32 -; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32 - - %tmp2 = bitcast { i32, i24 }* %a to i64* - %tmp3 = load i64, i64* %tmp2, align 4 - %tmp4 = bitcast { i32, i24 }* %a to i32* - %tmp5 = load i32, i32* %tmp4, align 4 -; CHECK: %[[HI_EXT:.*]] = zext i32 %[[HI_START]] to i64 -; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296 -; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]] -; CHECK: %[[LO_EXT:.*]] = zext i32 %[[LO_START]] to i64 -; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32 -; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295 -; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]] - - call void @f(i64 %tmp3, i32 %tmp5) -; CHECK: call void @f(i64 %[[LO_MERGE]], i32 %[[LO_START]]) - ret void -; CHECK: ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/llvm/test/Transforms/SROA/dbg-addr-diamond.ll b/llvm/test/Transforms/SROA/dbg-addr-diamond.ll deleted file mode 100644 index b3ca07e642f..00000000000 --- a/llvm/test/Transforms/SROA/dbg-addr-diamond.ll +++ /dev/null @@ -1,127 +0,0 @@ -; RUN: opt -use-dbg-addr -sroa -S < %s | FileCheck %s - -; ModuleID = '<stdin>' -source_filename = "newvars.c" -target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-windows-msvc19.0.24215" - -%struct.Pair = type { i32, i32 } - -@pair = internal global %struct.Pair zeroinitializer - -; Function Attrs: nounwind uwtable -define void @if_else(i32 %cond, i32 %a, i32 %b) !dbg !8 { -entry: - %p = alloca %struct.Pair, align 4 - %0 = bitcast %struct.Pair* %p to i8*, !dbg !25 - call void @llvm.dbg.addr(metadata %struct.Pair* %p, metadata !20, metadata !DIExpression()), !dbg !26 - %x = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 0, !dbg !27 - store i32 %a, i32* %x, align 4, !dbg !28 - %y = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 1, !dbg !34 - store i32 %b, i32* %y, align 4, !dbg !35 - %tobool = icmp ne i32 %cond, 0, !dbg !37 - br i1 %tobool, label %if.then, label %if.else, !dbg !39 - -if.then: ; preds = %entry - %x1 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 0, !dbg !40 - store i32 0, i32* %x1, align 4, !dbg !42 - %y2 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 1, !dbg !43 - store i32 %a, i32* %y2, align 4, !dbg !44 - br label %if.end, !dbg !45 - -if.else: ; preds = %entry - %x3 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 0, !dbg !46 - store i32 %b, i32* %x3, align 4, !dbg !48 - %y4 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 1, !dbg !49 - store i32 0, i32* %y4, align 4, !dbg !50 - br label %if.end - -if.end: ; preds = %if.else, %if.then - %1 = bitcast %struct.Pair* %p to i8*, !dbg !51 - %2 = bitcast %struct.Pair* @pair to i8*, !dbg !51 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %2, i8* align 4 %1, i64 8, i1 false), !dbg !51 - ret void -} - -; CHECK-LABEL: define void @if_else(i32 %cond, i32 %a, i32 %b) -; CHECK: entry: -; CHECK: call void @llvm.dbg.value(metadata i32 %a, metadata ![[PVAR:[0-9]+]], metadata ![[XFRAG:DIExpression\(DW_OP_LLVM_fragment, 0, 32\)]]) -; CHECK: call void @llvm.dbg.value(metadata i32 %b, metadata ![[PVAR]], metadata ![[YFRAG:DIExpression\(DW_OP_LLVM_fragment, 32, 32\)]]) -; CHECK: if.then: -; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[PVAR]], metadata ![[XFRAG]]) -; CHECK: call void @llvm.dbg.value(metadata i32 %a, metadata ![[PVAR]], metadata ![[YFRAG]]) -; CHECK: if.else: -; CHECK: call void @llvm.dbg.value(metadata i32 %b, metadata ![[PVAR]], metadata ![[XFRAG]]) -; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[PVAR]], metadata ![[YFRAG]]) -; CHECK: if.end: -; CHECK: %p.sroa.4.0 = phi i32 [ %a, %if.then ], [ 0, %if.else ] -; CHECK: %p.sroa.0.0 = phi i32 [ 0, %if.then ], [ %b, %if.else ] -; CHECK: call void @llvm.dbg.value(metadata i32 %p.sroa.0.0, metadata ![[PVAR]], metadata ![[XFRAG]]) -; CHECK: call void @llvm.dbg.value(metadata i32 %p.sroa.4.0, metadata ![[PVAR]], metadata ![[YFRAG]]) - -; CHECK: ![[PVAR]] = !DILocalVariable(name: "p", {{.*}}) - -; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #2 - -; Function Attrs: nounwind readnone speculatable -declare void @llvm.dbg.addr(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5, !6} -!llvm.ident = !{!7} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "newvars.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 2} -!6 = !{i32 7, !"PIC Level", i32 2} -!7 = !{!"clang version 6.0.0 "} -!8 = distinct !DISubprogram(name: "if_else", scope: !1, file: !1, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !16) -!9 = !DISubroutineType(types: !10) -!10 = !{!11, !14, !14, !14} -!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pair", file: !1, line: 1, size: 64, elements: !12) -!12 = !{!13, !15} -!13 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !11, file: !1, line: 1, baseType: !14, size: 32) -!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!15 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !11, file: !1, line: 1, baseType: !14, size: 32, offset: 32) -!16 = !{!17, !18, !19, !20} -!17 = !DILocalVariable(name: "b", arg: 3, scope: !8, file: !1, line: 2, type: !14) -!18 = !DILocalVariable(name: "a", arg: 2, scope: !8, file: !1, line: 2, type: !14) -!19 = !DILocalVariable(name: "cond", arg: 1, scope: !8, file: !1, line: 2, type: !14) -!20 = !DILocalVariable(name: "p", scope: !8, file: !1, line: 3, type: !11) -!22 = !DILocation(line: 2, column: 42, scope: !8) -!23 = !DILocation(line: 2, column: 35, scope: !8) -!24 = !DILocation(line: 2, column: 25, scope: !8) -!25 = !DILocation(line: 3, column: 3, scope: !8) -!26 = !DILocation(line: 3, column: 15, scope: !8) -!27 = !DILocation(line: 4, column: 5, scope: !8) -!28 = !DILocation(line: 4, column: 7, scope: !8) -!29 = !{!30, !31, i64 0} -!30 = !{!"Pair", !31, i64 0, !31, i64 4} -!31 = !{!"int", !32, i64 0} -!32 = !{!"omnipotent char", !33, i64 0} -!33 = !{!"Simple C/C++ TBAA"} -!34 = !DILocation(line: 5, column: 5, scope: !8) -!35 = !DILocation(line: 5, column: 7, scope: !8) -!36 = !{!30, !31, i64 4} -!37 = !DILocation(line: 6, column: 7, scope: !38) -!38 = distinct !DILexicalBlock(scope: !8, file: !1, line: 6, column: 7) -!39 = !DILocation(line: 6, column: 7, scope: !8) -!40 = !DILocation(line: 7, column: 7, scope: !41) -!41 = distinct !DILexicalBlock(scope: !38, file: !1, line: 6, column: 13) -!42 = !DILocation(line: 7, column: 9, scope: !41) -!43 = !DILocation(line: 8, column: 7, scope: !41) -!44 = !DILocation(line: 8, column: 9, scope: !41) -!45 = !DILocation(line: 9, column: 3, scope: !41) -!46 = !DILocation(line: 10, column: 7, scope: !47) -!47 = distinct !DILexicalBlock(scope: !38, file: !1, line: 9, column: 10) -!48 = !DILocation(line: 10, column: 9, scope: !47) -!49 = !DILocation(line: 11, column: 7, scope: !47) -!50 = !DILocation(line: 11, column: 9, scope: !47) -!51 = !DILocation(line: 13, column: 10, scope: !8) -!52 = !{i64 0, i64 4, !53, i64 4, i64 4, !53} -!53 = !{!31, !31, i64 0} -!54 = !DILocation(line: 14, column: 1, scope: !8) diff --git a/llvm/test/Transforms/SROA/dbg-single-piece.ll b/llvm/test/Transforms/SROA/dbg-single-piece.ll deleted file mode 100644 index b174e5dc764..00000000000 --- a/llvm/test/Transforms/SROA/dbg-single-piece.ll +++ /dev/null @@ -1,37 +0,0 @@ -; RUN: opt -sroa %s -S | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -%foo = type { [8 x i8], [8 x i8] } - -declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 -define void @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE() { -entry: - %retval = alloca %foo, align 8 - call void @llvm.dbg.declare(metadata %foo* %retval, metadata !1, metadata !7), !dbg !8 -; Checks that SROA still inserts a bit_piece expression, even if it produces only one piece -; (as long as that piece is smaller than the whole thing) -; CHECK-NOT: call void @llvm.dbg.value -; CHECK: call void @llvm.dbg.value(metadata %foo* undef, {{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg -; CHECK-NOT: call void @llvm.dbg.value - %0 = bitcast %foo* %retval to i8* - %1 = getelementptr inbounds i8, i8* %0, i64 8 - %2 = bitcast i8* %1 to %foo** - store %foo* undef, %foo** %2, align 8 - ret void -} - -attributes #0 = { nounwind readnone } - -!llvm.dbg.cu = !{!9} -!llvm.module.flags = !{!0} - -!0 = !{i32 2, !"Debug Info Version", i32 3} -!1 = !DILocalVariable(name: "I", scope: !2, file: !3, line: 947, type: !4) -!2 = distinct !DISubprogram(name: "findInsertLocation", linkageName: "_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE", scope: !3, file: !3, line: 937, isLocal: true, isDefinition: true, scopeLine: 938, flags: DIFlagPrototyped, isOptimized: true, unit: !9) -!3 = !DIFile(filename: "none", directory: ".") -!4 = !DICompositeType(tag: DW_TAG_class_type, name: "bundle_iterator<llvm::MachineInstr, llvm::ilist_iterator<llvm::MachineInstr> >", scope: !5, file: !3, line: 163, size: 128, align: 64, elements: !6, templateParams: !6, identifier: "_ZTSN4llvm17MachineBasicBlock15bundle_iteratorINS_12MachineInstrENS_14ilist_iteratorIS2_EEEE") -!5 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "MachineBasicBlock", file: !3, line: 68, size: 1408, align: 64, identifier: "_ZTSN4llvm17MachineBasicBlockE") -!6 = !{} -!7 = !DIExpression() -!8 = !DILocation(line: 947, column: 35, scope: !2) -!9 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3) diff --git a/llvm/test/Transforms/SROA/dead-inst.ll b/llvm/test/Transforms/SROA/dead-inst.ll deleted file mode 100644 index b1d106df3e6..00000000000 --- a/llvm/test/Transforms/SROA/dead-inst.ll +++ /dev/null @@ -1,97 +0,0 @@ -; SROA fails to rewrite allocs but does rewrite some phis and delete -; dead instructions. Ensure that this invalidates analyses required -; for other passes. -; RUN: opt < %s -passes=bdce,sroa,bdce -o %t -debug-pass-manager 2>&1 | FileCheck %s -; CHECK: Running pass: BDCEPass on H -; CHECK: Running analysis: DemandedBitsAnalysis on H -; CHECK: Running pass: SROA on H -; CHECK: Invalidating all non-preserved analyses for: H -; CHECK: Invalidating analysis: DemandedBitsAnalysis on H -; CHECK: Running pass: BDCEPass on H -; CHECK: Running analysis: DemandedBitsAnalysis on H -; CHECK: Finished llvm::Function pass manager run. - -target datalayout = "e-m:e-i64:64-n32:64" -target triple = "powerpc64le-grtev4-linux-gnu" - -%class.b = type { i64 } - -declare void @D(%class.b* sret, %class.b* dereferenceable(32)) local_unnamed_addr - -; Function Attrs: nounwind -define hidden fastcc void @H(%class.b* noalias nocapture readnone, [2 x i64]) unnamed_addr { - %3 = alloca %class.b, align 8 - %.sroa.0 = alloca i64, align 8 - store i64 0, i64* %.sroa.0, align 8 - %4 = extractvalue [2 x i64] %1, 1 - switch i64 %4, label %6 [ - i64 4, label %foo - i64 5, label %5 - ] - -; <label>:5: - %.sroa.0.0..sroa_cast3 = bitcast i64* %.sroa.0 to i8** - br label %12 - -; <label>:6: - %7 = icmp ugt i64 %4, 5 - %.sroa.0.0..sroa_cast5 = bitcast i64* %.sroa.0 to i8** - br i1 %7, label %8, label %12 - -; <label>:8: - %9 = load i8, i8* inttoptr (i64 4 to i8*), align 4 - %10 = icmp eq i8 %9, 47 - %11 = select i1 %10, i64 5, i64 4 - br label %12 - -; <label>:12: - %13 = phi i8** [ %.sroa.0.0..sroa_cast3, %5 ], [ %.sroa.0.0..sroa_cast5, %8 ], [ %.sroa.0.0..sroa_cast5, %6 ] - %14 = phi i64 [ 4, %5 ], [ %11, %8 ], [ 4, %6 ] - %15 = icmp ne i64 %4, 0 - %16 = icmp ugt i64 %4, %14 - %17 = and i1 %15, %16 - br i1 %17, label %18, label %a.exit - -; <label>:18: - %19 = tail call i8* @memchr(i8* undef, i32 signext undef, i64 undef) - %20 = icmp eq i8* %19, null - %21 = sext i1 %20 to i64 - br label %a.exit - -a.exit: - %22 = phi i64 [ -1, %12 ], [ %21, %18 ] - %23 = load i8*, i8** %13, align 8 - %24 = sub nsw i64 %22, %14 - %25 = bitcast %class.b* %3 to i8* - call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %25) - %26 = icmp ult i64 %24, 2 - br i1 %26, label %G.exit, label %27 - -; <label>:27: - %28 = getelementptr inbounds i8, i8* %23, i64 undef - %29 = icmp eq i8* %28, null - br i1 %29, label %30, label %31 - -; <label>:30: - unreachable - -; <label>:31: - call void @D(%class.b* nonnull sret %3, %class.b* nonnull dereferenceable(32) undef) - br label %G.exit - -G.exit: - call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %25) - br label %foo - -foo: - ret void -} - -; Function Attrs: nounwind readonly -declare i8* @memchr(i8*, i32 signext, i64) local_unnamed_addr - -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) - -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) diff --git a/llvm/test/Transforms/SROA/fca.ll b/llvm/test/Transforms/SROA/fca.ll deleted file mode 100644 index 707f680e64e..00000000000 --- a/llvm/test/Transforms/SROA/fca.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -define { i32, i32 } @test0(i32 %x, i32 %y) { -; CHECK-LABEL: @test0( -; CHECK-NOT: alloca -; CHECK: insertvalue { i32, i32 } -; CHECK: insertvalue { i32, i32 } -; CHECK: ret { i32, i32 } - -entry: - %a = alloca { i32, i32 } - - store { i32, i32 } undef, { i32, i32 }* %a - - %gep1 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 0 - store i32 %x, i32* %gep1 - %gep2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 1 - store i32 %y, i32* %gep2 - - %result = load { i32, i32 }, { i32, i32 }* %a - ret { i32, i32 } %result -} - -define { i32, i32 } @test1(i32 %x, i32 %y) { -; FIXME: This may be too conservative. Duncan argues that we are allowed to -; split the volatile load and store here but must produce volatile scalar loads -; and stores from them. -; CHECK-LABEL: @test1( -; CHECK: alloca -; CHECK: alloca -; CHECK: load volatile { i32, i32 }, { i32, i32 }* -; CHECK: store volatile { i32, i32 } -; CHECK: ret { i32, i32 } - -entry: - %a = alloca { i32, i32 } - %b = alloca { i32, i32 } - - %gep1 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 0 - store i32 %x, i32* %gep1 - %gep2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 1 - store i32 %y, i32* %gep2 - - %result = load volatile { i32, i32 }, { i32, i32 }* %a - store volatile { i32, i32 } %result, { i32, i32 }* %b - ret { i32, i32 } %result -} diff --git a/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll b/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll deleted file mode 100644 index 577245cda60..00000000000 --- a/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll +++ /dev/null @@ -1,111 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -; -; Make sure the llvm.access.group meta-data is preserved -; when a load/store is replaced with another load/store by sroa -; -; class Complex { -; private: -; float real_; -; float imaginary_; -; -; public: -; Complex() : real_(0), imaginary_(0) { } -; Complex(float real, float imaginary) : real_(real), imaginary_(imaginary) { } -; Complex(const Complex &rhs) : real_(rhs.real()), imaginary_(rhs.imaginary()) { } -; -; inline float real() const { return real_; } -; inline float imaginary() const { return imaginary_; } -; -; Complex operator+(const Complex& rhs) const -; { -; return Complex(real_ + rhs.real_, imaginary_ + rhs.imaginary_); -; } -; }; -; -; void test(Complex *out, long size) -; { -; #pragma clang loop vectorize(assume_safety) -; for (long offset = 0; offset < size; ++offset) { -; Complex t0 = out[offset]; -; out[offset] = t0 + t0; -; } -; } - -; CHECK: for.body: -; CHECK-NOT: store i32 %{{.*}}, i32* %{{.*}}, align 4 -; CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4, !llvm.access.group !1 -; CHECK-NOT: store i32 %{{.*}}, i32* %{{.*}}, align 4 -; CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4, !llvm.access.group !1 -; CHECK-NOT: store i32 %{{.*}}, i32* %{{.*}}, align 4 -; CHECK: br label - -; ModuleID = '<stdin>' -source_filename = "mem-par-metadata-sroa1.cpp" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%class.Complex = type { float, float } - -; Function Attrs: norecurse nounwind uwtable -define void @_Z4testP7Complexl(%class.Complex* nocapture %out, i64 %size) local_unnamed_addr #0 { -entry: - %t0 = alloca %class.Complex, align 4 - %ref.tmp = alloca i64, align 8 - %tmpcast = bitcast i64* %ref.tmp to %class.Complex* - br label %for.cond - -for.cond: ; preds = %for.body, %entry - %offset.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %cmp = icmp slt i64 %offset.0, %size - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.0 - %real_.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 0 - %real_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx, i64 0, i32 0 - %0 = load float, float* %real_.i.i, align 4, !llvm.access.group !11 - store float %0, float* %real_.i, align 4, !llvm.access.group !11 - %imaginary_.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 1 - %imaginary_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx, i64 0, i32 1 - %1 = load float, float* %imaginary_.i.i, align 4, !llvm.access.group !11 - store float %1, float* %imaginary_.i, align 4, !llvm.access.group !11 - %arrayidx1 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.0 - %real_.i1 = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 0 - %2 = load float, float* %real_.i1, align 4, !noalias !3, !llvm.access.group !11 - %real_2.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 0 - %3 = load float, float* %real_2.i, align 4, !noalias !3, !llvm.access.group !11 - %add.i = fadd float %2, %3 - %imaginary_.i2 = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 1 - %4 = load float, float* %imaginary_.i2, align 4, !noalias !3, !llvm.access.group !11 - %imaginary_3.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 1 - %5 = load float, float* %imaginary_3.i, align 4, !noalias !3, !llvm.access.group !11 - %add4.i = fadd float %4, %5 - %real_.i.i3 = getelementptr inbounds %class.Complex, %class.Complex* %tmpcast, i64 0, i32 0 - store float %add.i, float* %real_.i.i3, align 4, !alias.scope !3, !llvm.access.group !11 - %imaginary_.i.i4 = getelementptr inbounds %class.Complex, %class.Complex* %tmpcast, i64 0, i32 1 - store float %add4.i, float* %imaginary_.i.i4, align 4, !alias.scope !3, !llvm.access.group !11 - %6 = bitcast %class.Complex* %arrayidx1 to i64* - %7 = load i64, i64* %ref.tmp, align 8, !llvm.access.group !11 - store i64 %7, i64* %6, align 4, !llvm.access.group !11 - %inc = add nsw i64 %offset.0, 1 - br label %for.cond, !llvm.loop !1 - -for.end: ; preds = %for.cond - ret void -} - -; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 - -attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind } - -!llvm.ident = !{!0} - -!0 = !{!"clang version 4.0.0 (cfe/trunk 277751)"} -!1 = distinct !{!1, !2, !{!"llvm.loop.parallel_accesses", !11}} -!2 = !{!"llvm.loop.vectorize.enable", i1 true} -!3 = !{!4} -!4 = distinct !{!4, !5, !"_ZNK7ComplexplERKS_: %agg.result"} -!5 = distinct !{!5, !"_ZNK7ComplexplERKS_"} -!11 = distinct !{} diff --git a/llvm/test/Transforms/SROA/non-integral-pointers.ll b/llvm/test/Transforms/SROA/non-integral-pointers.ll deleted file mode 100644 index 166f5dc7b42..00000000000 --- a/llvm/test/Transforms/SROA/non-integral-pointers.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: opt -sroa -S < %s | FileCheck %s - -; This test checks that SROA does not introduce ptrtoint and inttoptr -; casts from and to non-integral pointers. The "ni:4" bit in the -; datalayout states that pointers of address space 4 are to be -; considered "non-integral". - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" -target triple = "x86_64-unknown-linux-gnu" - -define void @f0(i1 %alwaysFalse, i64 %val) { -; CHECK-LABEL: @f0( -; CHECK-NOT: inttoptr -; CHECK-NOT: ptrtoint -entry: - %loc = alloca i64 - store i64 %val, i64* %loc - br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken - -neverTaken: - %loc.bc = bitcast i64* %loc to i8 addrspace(4)** - %ptr = load i8 addrspace(4)*, i8 addrspace(4)** %loc.bc - store i8 5, i8 addrspace(4)* %ptr - ret void - -alwaysTaken: - ret void -} - -define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val) { -; CHECK-LABEL: @f1( -; CHECK-NOT: inttoptr -; CHECK-NOT: ptrtoint -entry: - %loc = alloca i8 addrspace(4)* - store i8 addrspace(4)* %val, i8 addrspace(4)** %loc - br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken - -neverTaken: - %loc.bc = bitcast i8 addrspace(4)** %loc to i64* - %int = load i64, i64* %loc.bc - ret i64 %int - -alwaysTaken: - ret i64 42 -} - -define i64 addrspace(4)* @memset(i1 %alwaysFalse) { -; CHECK-LABEL: @memset( -; CHECK-NOT: inttoptr -; CHECK-NOT: ptrtoint -entry: - %x = alloca i64 addrspace(4)* - %cast.0 = bitcast i64 addrspace(4)** %x to i8* - call void @llvm.memset.p0i8.i64(i8* align 8 %cast.0, i8 5, i64 16, i1 false) - br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken - -neverTaken: - %x.field.ld.0 = load i64 addrspace(4)*, i64 addrspace(4)** %x - ret i64 addrspace(4)* %x.field.ld.0 - -alwaysTaken: - ret i64 addrspace(4)* null -} - -;; TODO: This one demonstrates a missed oppurtunity. The only known bit -;; pattern for a non-integral bit pattern is that null is zero. As such -;; we could do SROA and replace the memset w/a null store. This will -;; usually be gotten by instcombine. -define i64 addrspace(4)* @memset_null(i1 %alwaysFalse) { -; CHECK-LABEL: @memset_null( -; CHECK-NOT: inttoptr -; CHECK-NOT: ptrtoint -entry: - %x = alloca i64 addrspace(4)* - %cast.0 = bitcast i64 addrspace(4)** %x to i8* - call void @llvm.memset.p0i8.i64(i8* align 8 %cast.0, i8 0, i64 16, i1 false) - br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken - -neverTaken: - %x.field.ld.0 = load i64 addrspace(4)*, i64 addrspace(4)** %x - ret i64 addrspace(4)* %x.field.ld.0 - -alwaysTaken: - ret i64 addrspace(4)* null -} - -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll deleted file mode 100644 index d0904cecd9f..00000000000 --- a/llvm/test/Transforms/SROA/phi-and-select.ll +++ /dev/null @@ -1,646 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -define i32 @test1() { -; CHECK-LABEL: @test1( -entry: - %a = alloca [2 x i32] -; CHECK-NOT: alloca - - %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0 - %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1 - store i32 0, i32* %a0 - store i32 1, i32* %a1 - %v0 = load i32, i32* %a0 - %v1 = load i32, i32* %a1 -; CHECK-NOT: store -; CHECK-NOT: load - - %cond = icmp sle i32 %v0, %v1 - br i1 %cond, label %then, label %exit - -then: - br label %exit - -exit: - %phi = phi i32* [ %a1, %then ], [ %a0, %entry ] -; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ] - - %result = load i32, i32* %phi - ret i32 %result -} - -define i32 @test2() { -; CHECK-LABEL: @test2( -entry: - %a = alloca [2 x i32] -; CHECK-NOT: alloca - - %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0 - %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1 - store i32 0, i32* %a0 - store i32 1, i32* %a1 - %v0 = load i32, i32* %a0 - %v1 = load i32, i32* %a1 -; CHECK-NOT: store -; CHECK-NOT: load - - %cond = icmp sle i32 %v0, %v1 - %select = select i1 %cond, i32* %a1, i32* %a0 -; CHECK: select i1 %{{.*}}, i32 1, i32 0 - - %result = load i32, i32* %select - ret i32 %result -} - -define i32 @test3(i32 %x) { -; CHECK-LABEL: @test3( -entry: - %a = alloca [2 x i32] -; CHECK-NOT: alloca - - ; Note that we build redundant GEPs here to ensure that having different GEPs - ; into the same alloca partation continues to work with PHI speculation. This - ; was the underlying cause of PR13926. - %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0 - %a0b = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0 - %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1 - %a1b = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1 - store i32 0, i32* %a0 - store i32 1, i32* %a1 -; CHECK-NOT: store - - switch i32 %x, label %bb0 [ i32 1, label %bb1 - i32 2, label %bb2 - i32 3, label %bb3 - i32 4, label %bb4 - i32 5, label %bb5 - i32 6, label %bb6 - i32 7, label %bb7 ] - -bb0: - br label %exit -bb1: - br label %exit -bb2: - br label %exit -bb3: - br label %exit -bb4: - br label %exit -bb5: - br label %exit -bb6: - br label %exit -bb7: - br label %exit - -exit: - %phi = phi i32* [ %a1, %bb0 ], [ %a0, %bb1 ], [ %a0, %bb2 ], [ %a1, %bb3 ], - [ %a1b, %bb4 ], [ %a0b, %bb5 ], [ %a0b, %bb6 ], [ %a1b, %bb7 ] -; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ], [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ] - - %result = load i32, i32* %phi - ret i32 %result -} - -define i32 @test4() { -; CHECK-LABEL: @test4( -entry: - %a = alloca [2 x i32] -; CHECK-NOT: alloca - - %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0 - %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1 - store i32 0, i32* %a0 - store i32 1, i32* %a1 - %v0 = load i32, i32* %a0 - %v1 = load i32, i32* %a1 -; CHECK-NOT: store -; CHECK-NOT: load - - %cond = icmp sle i32 %v0, %v1 - %select = select i1 %cond, i32* %a0, i32* %a0 -; CHECK-NOT: select - - %result = load i32, i32* %select - ret i32 %result -; CHECK: ret i32 0 -} - -define i32 @test5(i32* %b) { -; CHECK-LABEL: @test5( -entry: - %a = alloca [2 x i32] -; CHECK-NOT: alloca - - %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1 - store i32 1, i32* %a1 -; CHECK-NOT: store - - %select = select i1 true, i32* %a1, i32* %b -; CHECK-NOT: select - - %result = load i32, i32* %select -; CHECK-NOT: load - - ret i32 %result -; CHECK: ret i32 1 -} - -declare void @f(i32*, i32*) - -define i32 @test6(i32* %b) { -; CHECK-LABEL: @test6( -entry: - %a = alloca [2 x i32] - %c = alloca i32 -; CHECK-NOT: alloca - - %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1 - store i32 1, i32* %a1 - - %select = select i1 true, i32* %a1, i32* %b - %select2 = select i1 false, i32* %a1, i32* %b - %select3 = select i1 false, i32* %c, i32* %b -; CHECK: %[[select2:.*]] = select i1 false, i32* undef, i32* %b -; CHECK: %[[select3:.*]] = select i1 false, i32* undef, i32* %b - - ; Note, this would potentially escape the alloca pointer except for the - ; constant folding of the select. - call void @f(i32* %select2, i32* %select3) -; CHECK: call void @f(i32* %[[select2]], i32* %[[select3]]) - - - %result = load i32, i32* %select -; CHECK-NOT: load - - %dead = load i32, i32* %c - - ret i32 %result -; CHECK: ret i32 1 -} - -define i32 @test7() { -; CHECK-LABEL: @test7( -; CHECK-NOT: alloca - -entry: - %X = alloca i32 - br i1 undef, label %good, label %bad - -good: - %Y1 = getelementptr i32, i32* %X, i64 0 - store i32 0, i32* %Y1 - br label %exit - -bad: - %Y2 = getelementptr i32, i32* %X, i64 1 - store i32 0, i32* %Y2 - br label %exit - -exit: - %P = phi i32* [ %Y1, %good ], [ %Y2, %bad ] -; CHECK: %[[phi:.*]] = phi i32 [ 0, %good ], - %Z2 = load i32, i32* %P - ret i32 %Z2 -; CHECK: ret i32 %[[phi]] -} - -define i32 @test8(i32 %b, i32* %ptr) { -; Ensure that we rewrite allocas to the used type when that use is hidden by -; a PHI that can be speculated. -; CHECK-LABEL: @test8( -; CHECK-NOT: alloca -; CHECK-NOT: load -; CHECK: %[[value:.*]] = load i32, i32* %ptr -; CHECK-NOT: load -; CHECK: %[[result:.*]] = phi i32 [ undef, %else ], [ %[[value]], %then ] -; CHECK-NEXT: ret i32 %[[result]] - -entry: - %f = alloca float - %test = icmp ne i32 %b, 0 - br i1 %test, label %then, label %else - -then: - br label %exit - -else: - %bitcast = bitcast float* %f to i32* - br label %exit - -exit: - %phi = phi i32* [ %bitcast, %else ], [ %ptr, %then ] - %loaded = load i32, i32* %phi, align 4 - ret i32 %loaded -} - -define i32 @test9(i32 %b, i32* %ptr) { -; Same as @test8 but for a select rather than a PHI node. -; CHECK-LABEL: @test9( -; CHECK-NOT: alloca -; CHECK-NOT: load -; CHECK: %[[value:.*]] = load i32, i32* %ptr -; CHECK-NOT: load -; CHECK: %[[result:.*]] = select i1 %{{.*}}, i32 undef, i32 %[[value]] -; CHECK-NEXT: ret i32 %[[result]] - -entry: - %f = alloca float - store i32 0, i32* %ptr - %test = icmp ne i32 %b, 0 - %bitcast = bitcast float* %f to i32* - %select = select i1 %test, i32* %bitcast, i32* %ptr - %loaded = load i32, i32* %select, align 4 - ret i32 %loaded -} - -define float @test10(i32 %b, float* %ptr) { -; Don't try to promote allocas which are not elligible for it even after -; rewriting due to the necessity of inserting bitcasts when speculating a PHI -; node. -; CHECK-LABEL: @test10( -; CHECK: %[[alloca:.*]] = alloca -; CHECK: %[[argvalue:.*]] = load float, float* %ptr -; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float* -; CHECK: %[[allocavalue:.*]] = load float, float* %[[cast]] -; CHECK: %[[result:.*]] = phi float [ %[[allocavalue]], %else ], [ %[[argvalue]], %then ] -; CHECK-NEXT: ret float %[[result]] - -entry: - %f = alloca double - store double 0.0, double* %f - %test = icmp ne i32 %b, 0 - br i1 %test, label %then, label %else - -then: - br label %exit - -else: - %bitcast = bitcast double* %f to float* - br label %exit - -exit: - %phi = phi float* [ %bitcast, %else ], [ %ptr, %then ] - %loaded = load float, float* %phi, align 4 - ret float %loaded -} - -define float @test11(i32 %b, float* %ptr) { -; Same as @test10 but for a select rather than a PHI node. -; CHECK-LABEL: @test11( -; CHECK: %[[alloca:.*]] = alloca -; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float* -; CHECK: %[[allocavalue:.*]] = load float, float* %[[cast]] -; CHECK: %[[argvalue:.*]] = load float, float* %ptr -; CHECK: %[[result:.*]] = select i1 %{{.*}}, float %[[allocavalue]], float %[[argvalue]] -; CHECK-NEXT: ret float %[[result]] - -entry: - %f = alloca double - store double 0.0, double* %f - store float 0.0, float* %ptr - %test = icmp ne i32 %b, 0 - %bitcast = bitcast double* %f to float* - %select = select i1 %test, float* %bitcast, float* %ptr - %loaded = load float, float* %select, align 4 - ret float %loaded -} - -define i32 @test12(i32 %x, i32* %p) { -; Ensure we don't crash or fail to nuke dead selects of allocas if no load is -; never found. -; CHECK-LABEL: @test12( -; CHECK-NOT: alloca -; CHECK-NOT: select -; CHECK: ret i32 %x - -entry: - %a = alloca i32 - store i32 %x, i32* %a - %dead = select i1 undef, i32* %a, i32* %p - %load = load i32, i32* %a - ret i32 %load -} - -define i32 @test13(i32 %x, i32* %p) { -; Ensure we don't crash or fail to nuke dead phis of allocas if no load is ever -; found. -; CHECK-LABEL: @test13( -; CHECK-NOT: alloca -; CHECK-NOT: phi -; CHECK: ret i32 %x - -entry: - %a = alloca i32 - store i32 %x, i32* %a - br label %loop - -loop: - %phi = phi i32* [ %p, %entry ], [ %a, %loop ] - br i1 undef, label %loop, label %exit - -exit: - %load = load i32, i32* %a - ret i32 %load -} - -define i32 @test14(i1 %b1, i1 %b2, i32* %ptr) { -; Check for problems when there are both selects and phis and one is -; speculatable toward promotion but the other is not. That should block all of -; the speculation. -; CHECK-LABEL: @test14( -; CHECK: alloca -; CHECK: alloca -; CHECK: select -; CHECK: phi -; CHECK: phi -; CHECK: select -; CHECK: ret i32 - -entry: - %f = alloca i32 - %g = alloca i32 - store i32 0, i32* %f - store i32 0, i32* %g - %f.select = select i1 %b1, i32* %f, i32* %ptr - br i1 %b2, label %then, label %else - -then: - br label %exit - -else: - br label %exit - -exit: - %f.phi = phi i32* [ %f, %then ], [ %f.select, %else ] - %g.phi = phi i32* [ %g, %then ], [ %ptr, %else ] - %f.loaded = load i32, i32* %f.phi - %g.select = select i1 %b1, i32* %g, i32* %g.phi - %g.loaded = load i32, i32* %g.select - %result = add i32 %f.loaded, %g.loaded - ret i32 %result -} - -define i32 @PR13905() { -; Check a pattern where we have a chain of dead phi nodes to ensure they are -; deleted and promotion can proceed. -; CHECK-LABEL: @PR13905( -; CHECK-NOT: alloca i32 -; CHECK: ret i32 undef - -entry: - %h = alloca i32 - store i32 0, i32* %h - br i1 undef, label %loop1, label %exit - -loop1: - %phi1 = phi i32* [ null, %entry ], [ %h, %loop1 ], [ %h, %loop2 ] - br i1 undef, label %loop1, label %loop2 - -loop2: - br i1 undef, label %loop1, label %exit - -exit: - %phi2 = phi i32* [ %phi1, %loop2 ], [ null, %entry ] - ret i32 undef -} - -define i32 @PR13906() { -; Another pattern which can lead to crashes due to failing to clear out dead -; PHI nodes or select nodes. This triggers subtly differently from the above -; cases because the PHI node is (recursively) alive, but the select is dead. -; CHECK-LABEL: @PR13906( -; CHECK-NOT: alloca - -entry: - %c = alloca i32 - store i32 0, i32* %c - br label %for.cond - -for.cond: - %d.0 = phi i32* [ undef, %entry ], [ %c, %if.then ], [ %d.0, %for.cond ] - br i1 undef, label %if.then, label %for.cond - -if.then: - %tmpcast.d.0 = select i1 undef, i32* %c, i32* %d.0 - br label %for.cond -} - -define i64 @PR14132(i1 %flag) { -; CHECK-LABEL: @PR14132( -; Here we form a PHI-node by promoting the pointer alloca first, and then in -; order to promote the other two allocas, we speculate the load of the -; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8 -; alloca. While this is a bit dubious, we were asserting on trying to -; rewrite it. The trick is that the code using the value may carefully take -; steps to only use the not-undef bits, and so we need to at least loosely -; support this.. -entry: - %a = alloca i64, align 8 - %b = alloca i8, align 8 - %ptr = alloca i64*, align 8 -; CHECK-NOT: alloca - - %ptr.cast = bitcast i64** %ptr to i8** - store i64 0, i64* %a, align 8 - store i8 1, i8* %b, align 8 - store i64* %a, i64** %ptr, align 8 - br i1 %flag, label %if.then, label %if.end - -if.then: - store i8* %b, i8** %ptr.cast, align 8 - br label %if.end -; CHECK-NOT: store -; CHECK: %[[ext:.*]] = zext i8 1 to i64 - -if.end: - %tmp = load i64*, i64** %ptr, align 8 - %result = load i64, i64* %tmp, align 8 -; CHECK-NOT: load -; CHECK: %[[result:.*]] = phi i64 [ %[[ext]], %if.then ], [ 0, %entry ] - - ret i64 %result -; CHECK-NEXT: ret i64 %[[result]] -} - -define float @PR16687(i64 %x, i1 %flag) { -; CHECK-LABEL: @PR16687( -; Check that even when we try to speculate the same phi twice (in two slices) -; on an otherwise promotable construct, we don't get ahead of ourselves and try -; to promote one of the slices prior to speculating it. - -entry: - %a = alloca i64, align 8 - store i64 %x, i64* %a - br i1 %flag, label %then, label %else -; CHECK-NOT: alloca -; CHECK-NOT: store -; CHECK: %[[lo:.*]] = trunc i64 %x to i32 -; CHECK: %[[shift:.*]] = lshr i64 %x, 32 -; CHECK: %[[hi:.*]] = trunc i64 %[[shift]] to i32 - -then: - %a.f = bitcast i64* %a to float* - br label %end -; CHECK: %[[lo_cast:.*]] = bitcast i32 %[[lo]] to float - -else: - %a.raw = bitcast i64* %a to i8* - %a.raw.4 = getelementptr i8, i8* %a.raw, i64 4 - %a.raw.4.f = bitcast i8* %a.raw.4 to float* - br label %end -; CHECK: %[[hi_cast:.*]] = bitcast i32 %[[hi]] to float - -end: - %a.phi.f = phi float* [ %a.f, %then ], [ %a.raw.4.f, %else ] - %f = load float, float* %a.phi.f - ret float %f -; CHECK: %[[phi:.*]] = phi float [ %[[lo_cast]], %then ], [ %[[hi_cast]], %else ] -; CHECK-NOT: load -; CHECK: ret float %[[phi]] -} - -; Verifies we fixed PR20425. We should be able to promote all alloca's to -; registers in this test. -; -; %0 = slice -; %1 = slice -; %2 = phi(%0, %1) // == slice -define float @simplify_phi_nodes_that_equal_slice(i1 %cond, float* %temp) { -; CHECK-LABEL: @simplify_phi_nodes_that_equal_slice( -entry: - %arr = alloca [4 x float], align 4 -; CHECK-NOT: alloca - br i1 %cond, label %then, label %else - -then: - %0 = getelementptr inbounds [4 x float], [4 x float]* %arr, i64 0, i64 3 - store float 1.000000e+00, float* %0, align 4 - br label %merge - -else: - %1 = getelementptr inbounds [4 x float], [4 x float]* %arr, i64 0, i64 3 - store float 2.000000e+00, float* %1, align 4 - br label %merge - -merge: - %2 = phi float* [ %0, %then ], [ %1, %else ] - store float 0.000000e+00, float* %temp, align 4 - %3 = load float, float* %2, align 4 - ret float %3 -} - -; A slightly complicated example for PR20425. -; -; %0 = slice -; %1 = phi(%0) // == slice -; %2 = slice -; %3 = phi(%1, %2) // == slice -define float @simplify_phi_nodes_that_equal_slice_2(i1 %cond, float* %temp) { -; CHECK-LABEL: @simplify_phi_nodes_that_equal_slice_2( -entry: - %arr = alloca [4 x float], align 4 -; CHECK-NOT: alloca - br i1 %cond, label %then, label %else - -then: - %0 = getelementptr inbounds [4 x float], [4 x float]* %arr, i64 0, i64 3 - store float 1.000000e+00, float* %0, align 4 - br label %then2 - -then2: - %1 = phi float* [ %0, %then ] - store float 2.000000e+00, float* %1, align 4 - br label %merge - -else: - %2 = getelementptr inbounds [4 x float], [4 x float]* %arr, i64 0, i64 3 - store float 3.000000e+00, float* %2, align 4 - br label %merge - -merge: - %3 = phi float* [ %1, %then2 ], [ %2, %else ] - store float 0.000000e+00, float* %temp, align 4 - %4 = load float, float* %3, align 4 - ret float %4 -} - -%struct.S = type { i32 } - -; Verifies we fixed PR20822. We have a foldable PHI feeding a speculatable PHI -; which requires the rewriting of the speculated PHI to handle insertion -; when the incoming pointer is itself from a PHI node. We would previously -; insert a bitcast instruction *before* a PHI, producing an invalid module; -; make sure we insert *after* the first non-PHI instruction. -define void @PR20822() { -; CHECK-LABEL: @PR20822( -entry: - %f = alloca %struct.S, align 4 -; CHECK: %[[alloca:.*]] = alloca - br i1 undef, label %if.end, label %for.cond - -for.cond: ; preds = %for.cond, %entry - br label %if.end - -if.end: ; preds = %for.cond, %entry - %f2 = phi %struct.S* [ %f, %entry ], [ %f, %for.cond ] -; CHECK: phi i32 -; CHECK: %[[cast:.*]] = bitcast i32* %[[alloca]] to %struct.S* - phi i32 [ undef, %entry ], [ undef, %for.cond ] - br i1 undef, label %if.then5, label %if.then2 - -if.then2: ; preds = %if.end - br label %if.then5 - -if.then5: ; preds = %if.then2, %if.end - %f1 = phi %struct.S* [ undef, %if.then2 ], [ %f2, %if.end ] -; CHECK: phi {{.*}} %[[cast]] - store %struct.S undef, %struct.S* %f1, align 4 - ret void -} - -define i32 @phi_align(i32* %z) { -; CHECK-LABEL: @phi_align( -entry: - %a = alloca [8 x i8], align 8 -; CHECK: alloca [7 x i8] - - %a0x = getelementptr [8 x i8], [8 x i8]* %a, i64 0, i32 1 - %a0 = bitcast i8* %a0x to i32* - %a1x = getelementptr [8 x i8], [8 x i8]* %a, i64 0, i32 4 - %a1 = bitcast i8* %a1x to i32* -; CHECK: store i32 0, {{.*}}, align 1 - store i32 0, i32* %a0, align 1 -; CHECK: store i32 1, {{.*}}, align 1 - store i32 1, i32* %a1, align 4 -; CHECK: load {{.*}}, align 1 - %v0 = load i32, i32* %a0, align 1 -; CHECK: load {{.*}}, align 1 - %v1 = load i32, i32* %a1, align 4 - %cond = icmp sle i32 %v0, %v1 - br i1 %cond, label %then, label %exit - -then: - br label %exit - -exit: -; CHECK: %phi = phi i32* [ {{.*}}, %then ], [ %z, %entry ] -; CHECK-NEXT: %result = load i32, i32* %phi, align 1 - %phi = phi i32* [ %a1, %then ], [ %z, %entry ] - %result = load i32, i32* %phi, align 4 - ret i32 %result -} - -; Don't speculate a load based on an earlier volatile operation. -define i8 @volatile_select(i8* %p, i1 %b) { -; CHECK-LABEL: @volatile_select( -; CHECK: select i1 %b, i8* %p, i8* %p2 - %p2 = alloca i8 - store i8 0, i8* %p2 - store volatile i8 0, i8* %p - %px = select i1 %b, i8* %p, i8* %p2 - %v2 = load i8, i8* %px - ret i8 %v2 -} diff --git a/llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll b/llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll deleted file mode 100644 index 64963fd6634..00000000000 --- a/llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll +++ /dev/null @@ -1,51 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -@a = external global i16, align 1 - -define void @f2() { -; CHECK-LABEL: @f2( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[CLEANUP:%.*]] -; CHECK: cleanup: -; CHECK-NEXT: [[G_0_SROA_SPECULATE_LOAD_CLEANUP:%.*]] = load i16, i16* @a, align 1 -; CHECK-NEXT: switch i32 2, label [[CLEANUP7:%.*]] [ -; CHECK-NEXT: i32 0, label [[LBL1:%.*]] -; CHECK-NEXT: i32 2, label [[LBL1]] -; CHECK-NEXT: ] -; CHECK: if.else: -; CHECK-NEXT: br label [[LBL1]] -; CHECK: lbl1: -; CHECK-NEXT: [[G_0_SROA_SPECULATED:%.*]] = phi i16 [ [[G_0_SROA_SPECULATE_LOAD_CLEANUP]], [[CLEANUP]] ], [ [[G_0_SROA_SPECULATE_LOAD_CLEANUP]], [[CLEANUP]] ], [ undef, [[IF_ELSE]] ] -; CHECK-NEXT: unreachable -; CHECK: cleanup7: -; CHECK-NEXT: ret void -; -entry: - %e = alloca i16, align 1 - br i1 undef, label %if.then, label %if.else - -if.then: ; preds = %entry - br label %cleanup - -cleanup: ; preds = %if.then - switch i32 2, label %cleanup7 [ - i32 0, label %lbl1 - i32 2, label %lbl1 - ] - -if.else: ; preds = %entry - br label %lbl1 - -lbl1: ; preds = %if.else, %cleanup, %cleanup - %g.0 = phi i16* [ @a, %cleanup ], [ @a, %cleanup ], [ %e, %if.else ] - %0 = load i16, i16* %g.0, align 1 - unreachable - -cleanup7: ; preds = %cleanup - ret void -} - diff --git a/llvm/test/Transforms/SROA/pointer-offset-size.ll b/llvm/test/Transforms/SROA/pointer-offset-size.ll deleted file mode 100644 index c632c37988b..00000000000 --- a/llvm/test/Transforms/SROA/pointer-offset-size.ll +++ /dev/null @@ -1,29 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64:32" - -%struct.test = type { %struct.basic, %struct.basic } -%struct.basic = type { i16, i8 } - -define i16 @test(%struct.test* %ts2.i) { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[S_SROA_0:%.*]] = alloca [3 x i8], align 2 -; CHECK-NEXT: [[S_SROA_0_0__SROA_CAST:%.*]] = bitcast %struct.test* [[TS2_I:%.*]] to i8* -; CHECK-NEXT: [[S_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [3 x i8], [3 x i8]* [[S_SROA_0]], i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[S_SROA_0_0__SROA_CAST]], i8* align 2 [[S_SROA_0_0__SROA_IDX]], i32 3, i1 false) -; CHECK-NEXT: [[X1_I_I:%.*]] = getelementptr inbounds [[STRUCT_TEST:%.*]], %struct.test* [[TS2_I]], i32 0, i32 0, i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[X1_I_I]] -; CHECK-NEXT: ret i16 [[TMP0]] -; -entry: - %s = alloca %struct.test - %0 = bitcast %struct.test* %ts2.i to i8* - %1 = bitcast %struct.test* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 3, i1 false) - %x1.i.i = getelementptr inbounds %struct.test, %struct.test* %ts2.i, i32 0, i32 0, i32 0 - %2 = load i16, i16* %x1.i.i - ret i16 %2 -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) diff --git a/llvm/test/Transforms/SROA/ppcf128-no-fold.ll b/llvm/test/Transforms/SROA/ppcf128-no-fold.ll deleted file mode 100644 index 3f2934cbe16..00000000000 --- a/llvm/test/Transforms/SROA/ppcf128-no-fold.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "E-m:e-i64:64-n32:64" -target triple = "powerpc64-unknown-linux-gnu" - -%struct.ld2 = type { [2 x ppc_fp128] } -declare void @bar(i8*, [2 x i128]) - -define void @foo(i8* %v) #0 { -entry: - %v.addr = alloca i8*, align 8 - %z = alloca %struct.ld2, align 16 - store i8* %v, i8** %v.addr, align 8 - %dat = getelementptr inbounds %struct.ld2, %struct.ld2* %z, i32 0, i32 0 - %arrayidx = getelementptr inbounds [2 x ppc_fp128], [2 x ppc_fp128]* %dat, i32 0, i64 0 - store ppc_fp128 0xM403B0000000000000000000000000000, ppc_fp128* %arrayidx, align 16 - %dat1 = getelementptr inbounds %struct.ld2, %struct.ld2* %z, i32 0, i32 0 - %arrayidx2 = getelementptr inbounds [2 x ppc_fp128], [2 x ppc_fp128]* %dat1, i32 0, i64 1 - store ppc_fp128 0xM4093B400000000000000000000000000, ppc_fp128* %arrayidx2, align 16 - %0 = load i8*, i8** %v.addr, align 8 - %coerce.dive = getelementptr %struct.ld2, %struct.ld2* %z, i32 0, i32 0 - %1 = bitcast [2 x ppc_fp128]* %coerce.dive to [2 x i128]* - %2 = load [2 x i128], [2 x i128]* %1, align 1 - call void @bar(i8* %0, [2 x i128] %2) - ret void -} - -; CHECK-LABEL: @foo -; CHECK-NOT: i128 4628293042053316608 -; CHECK-NOT: i128 4653260752096854016 -; CHECK-DAG: i128 bitcast (ppc_fp128 0xM403B0000000000000000000000000000 to i128) -; CHECK-DAG: i128 bitcast (ppc_fp128 0xM4093B400000000000000000000000000 to i128) -; CHECK: call void @bar(i8* %v, [2 x i128] -; CHECK: ret void - -attributes #0 = { nounwind } - diff --git a/llvm/test/Transforms/SROA/pr26972.ll b/llvm/test/Transforms/SROA/pr26972.ll deleted file mode 100644 index 3140a805fc4..00000000000 --- a/llvm/test/Transforms/SROA/pr26972.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-linux" - -; Make sure we properly handle allocas where the allocated -; size overflows a uint32_t. This specific constant results in -; the size in bits being 32 after truncation to a 32-bit int. -; CHECK-LABEL: fn1 -; CHECK-NEXT: ret void -define void @fn1() { - %a = alloca [1073741825 x i32], align 16 - %t0 = bitcast [1073741825 x i32]* %a to i8* - call void @llvm.lifetime.end.p0i8(i64 4294967300, i8* %t0) - ret void -} - -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) diff --git a/llvm/test/Transforms/SROA/pr37267.ll b/llvm/test/Transforms/SROA/pr37267.ll deleted file mode 100644 index 4fcb1f29aac..00000000000 --- a/llvm/test/Transforms/SROA/pr37267.ll +++ /dev/null @@ -1,74 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32:64-S128" -target triple = "sparcv9-sun-solaris" - -; PR37267 -; Check that we don't crash on this test. - -define i16 @f1() { -; CHECK-LABEL: @f1 -; CHECK: %[[retval:.*]] = add i16 2, 2 -; CHECK: ret i16 %[[retval]] - -bb1: -; This 12-byte alloca is split into partitions as [0,2), [2,4), [4,8), [8,10), [10, 12). -; The reported error happened when rewriteIntegerStore try to widen a split tail of slice 1 for [4, 8) partition. -; alloca 012345678901 -; slice 1: WWWW -; slice 2: WWWW -; slice 3: RR -; slice 4: RR - - %a.3 = alloca [6 x i16], align 1 -; slice 1: [2,6) - %_tmp3 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 1 - %_tmp5 = bitcast i16* %_tmp3 to i32* - store i32 131074, i32* %_tmp5, align 1 -; slice 2: [8,12) - %_tmp8 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 4 - %_tmp10 = bitcast i16* %_tmp8 to i32* - store i32 131074, i32* %_tmp10, align 1 -; slice 3: [8,10) - %_tmp12 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 4 - %_tmp13 = load i16, i16* %_tmp12, align 1 -; slice 4: [2,4) - %_tmp15 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 1 - %_tmp16 = load i16, i16* %_tmp15, align 1 - - %rc = add i16 %_tmp13, %_tmp16 - ret i16 %rc -} - -define i16 @f2() { -; CHECK-LABEL: @f2 -; CHECK: %[[retval:.*]] = add i16 2, undef -; CHECK: ret i16 %[[retval]] - -bb1: -; This 12-byte alloca is split into partitions as [0,2), [2,4), [4,8), [8,10), [10, 12). -; The reported error happened when visitLoadInst rewrites a split tail of slice 1 for [4, 8) partition. -; alloca 012345678901 -; slice 1: RRRR -; slice 2: WWWW -; slice 3: RR -; slice 4: RR - - %a.3 = alloca [6 x i16], align 1 -; slice 1: [2,6) - %_tmp3 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 1 - %_tmp5 = bitcast i16* %_tmp3 to i32* - %_tmp6 = load i32, i32* %_tmp5, align 1 -; slice 2: [8,12) - %_tmp8 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 4 - %_tmp10 = bitcast i16* %_tmp8 to i32* - store i32 131074, i32* %_tmp10, align 1 -; slice 3: [8,10) - %_tmp12 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 4 - %_tmp13 = load i16, i16* %_tmp12, align 1 -; slice 4: [2,4) - %_tmp15 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 1 - %_tmp16 = load i16, i16* %_tmp15, align 1 - - %rc = add i16 %_tmp13, %_tmp16 - ret i16 %rc -} diff --git a/llvm/test/Transforms/SROA/preserve-nonnull.ll b/llvm/test/Transforms/SROA/preserve-nonnull.ll deleted file mode 100644 index d6f084fd749..00000000000 --- a/llvm/test/Transforms/SROA/preserve-nonnull.ll +++ /dev/null @@ -1,92 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -; -; Make sure that SROA doesn't lose nonnull metadata -; on loads from allocas that get optimized out. - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) - -; Check that we do basic propagation of nonnull when rewriting. -define i8* @propagate_nonnull(i32* %v) { -; CHECK-LABEL: define i8* @propagate_nonnull( -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[A:.*]] = alloca i8* -; CHECK-NEXT: %[[V_CAST:.*]] = bitcast i32* %v to i8* -; CHECK-NEXT: store i8* %[[V_CAST]], i8** %[[A]] -; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0 -; CHECK-NEXT: ret i8* %[[LOAD]] -entry: - %a = alloca [2 x i8*] - %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0 - %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1 - %a.gep0.cast = bitcast i8** %a.gep0 to i32** - %a.gep1.cast = bitcast i8** %a.gep1 to i32** - store i32* %v, i32** %a.gep1.cast - store i32* null, i32** %a.gep0.cast - %load = load volatile i8*, i8** %a.gep1, !nonnull !0 - ret i8* %load -} - -define float* @turn_nonnull_into_assume(float** %arg) { -; CHECK-LABEL: define float* @turn_nonnull_into_assume( -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8 -; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null -; CHECK-NEXT: call void @llvm.assume(i1 %[[ASSUME]]) -; CHECK-NEXT: ret float* %[[RETURN]] -entry: - %buf = alloca float* - %_arg_i8 = bitcast float** %arg to i8* - %_buf_i8 = bitcast float** %buf to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %_buf_i8, i8* align 8 %_arg_i8, i64 8, i1 false) - %ret = load float*, float** %buf, align 8, !nonnull !0 - ret float* %ret -} - -; Make sure we properly handle the !nonnull attribute when we convert -; a pointer load to an integer load. -; FIXME: While this doesn't do anythnig actively harmful today, it really -; should propagate the !nonnull metadata to range metadata. The irony is, it -; *does* initially, but then we lose that !range metadata before we finish -; SROA. -define i8* @propagate_nonnull_to_int() { -; CHECK-LABEL: define i8* @propagate_nonnull_to_int( -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[A:.*]] = alloca i64 -; CHECK-NEXT: store i64 42, i64* %[[A]] -; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]] -; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8* -; CHECK-NEXT: ret i8* %[[CAST]] -entry: - %a = alloca [2 x i8*] - %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0 - %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1 - %a.gep0.cast = bitcast i8** %a.gep0 to i64* - %a.gep1.cast = bitcast i8** %a.gep1 to i64* - store i64 42, i64* %a.gep1.cast - store i64 0, i64* %a.gep0.cast - %load = load volatile i8*, i8** %a.gep1, !nonnull !0 - ret i8* %load -} - -; Make sure we properly handle the !nonnull attribute when we convert -; a pointer load to an integer load and immediately promote it to an SSA -; register. This can fail in interesting ways due to the rewrite iteration of -; SROA, resulting in PR32902. -define i8* @propagate_nonnull_to_int_and_promote() { -; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote( -; CHECK-NEXT: entry: -; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8* -; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]] -entry: - %a = alloca [2 x i8*], align 8 - %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0 - %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1 - %a.gep0.cast = bitcast i8** %a.gep0 to i64* - %a.gep1.cast = bitcast i8** %a.gep1 to i64* - store i64 42, i64* %a.gep1.cast - store i64 0, i64* %a.gep0.cast - %load = load i8*, i8** %a.gep1, align 8, !nonnull !0 - ret i8* %load -} - -!0 = !{} diff --git a/llvm/test/Transforms/SROA/slice-order-independence.ll b/llvm/test/Transforms/SROA/slice-order-independence.ll deleted file mode 100644 index 19e7143fd13..00000000000 --- a/llvm/test/Transforms/SROA/slice-order-independence.ll +++ /dev/null @@ -1,37 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind - -; Check that the chosen type for a split is independent from the order of -; slices even in case of types that are skipped because their width is not a -; byte width multiple -define void @skipped_inttype_first({ i16*, i32 }*) { -; CHECK-LABEL: @skipped_inttype_first -; CHECK: alloca i8* - %arg = alloca { i16*, i32 }, align 8 - %2 = bitcast { i16*, i32 }* %0 to i8* - %3 = bitcast { i16*, i32 }* %arg to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %3, i8* align 8 %2, i32 16, i1 false) - %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0 - %pb0 = bitcast i16** %b to i63* - %b0 = load i63, i63* %pb0 - %pb1 = bitcast i16** %b to i8** - %b1 = load i8*, i8** %pb1 - ret void -} - -define void @skipped_inttype_last({ i16*, i32 }*) { -; CHECK-LABEL: @skipped_inttype_last -; CHECK: alloca i8* - %arg = alloca { i16*, i32 }, align 8 - %2 = bitcast { i16*, i32 }* %0 to i8* - %3 = bitcast { i16*, i32 }* %arg to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %3, i8* align 8 %2, i32 16, i1 false) - %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0 - %pb1 = bitcast i16** %b to i8** - %b1 = load i8*, i8** %pb1 - %pb0 = bitcast i16** %b to i63* - %b0 = load i63, i63* %pb0 - ret void -} diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll deleted file mode 100644 index 98ec1e97933..00000000000 --- a/llvm/test/Transforms/SROA/slice-width.ll +++ /dev/null @@ -1,106 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind - -define void @no_split_on_non_byte_width(i32) { -; This tests that allocas are not split into slices that are not byte width multiple - %arg = alloca i32 , align 8 - store i32 %0, i32* %arg - br label %load_i32 - -load_i32: -; CHECK-LABEL: load_i32: -; CHECK-NOT: bitcast {{.*}} to i1 -; CHECK-NOT: zext i1 - %r0 = load i32, i32* %arg - br label %load_i1 - -load_i1: -; CHECK-LABEL: load_i1: -; CHECK: bitcast {{.*}} to i1 - %p1 = bitcast i32* %arg to i1* - %t1 = load i1, i1* %p1 - ret void -} - -; PR18726: Check that we use memcpy and memset to fill out padding when we have -; a slice with a simple single type whose store size is smaller than the slice -; size. - -%union.Foo = type { x86_fp80, i64, i64 } - -@foo_copy_source = external constant %union.Foo -@i64_sink = global i64 0 - -define void @memcpy_fp80_padding() { - %x = alloca %union.Foo - - ; Copy from a global. - %x_i8 = bitcast %union.Foo* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %x_i8, i8* align 16 bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i1 false) - - ; Access a slice of the alloca to trigger SROA. - %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1 - %elt = load i64, i64* %mid_p - store i64 %elt, i64* @i64_sink - ret void -} -; CHECK-LABEL: define void @memcpy_fp80_padding -; CHECK: alloca x86_fp80 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK: load i64, i64* getelementptr inbounds (%union.Foo, %union.Foo* @foo_copy_source, i64 0, i32 1) -; CHECK: load i64, i64* getelementptr inbounds (%union.Foo, %union.Foo* @foo_copy_source, i64 0, i32 2) - -define void @memset_fp80_padding() { - %x = alloca %union.Foo - - ; Set to all ones. - %x_i8 = bitcast %union.Foo* %x to i8* - call void @llvm.memset.p0i8.i32(i8* align 16 %x_i8, i8 -1, i32 32, i1 false) - - ; Access a slice of the alloca to trigger SROA. - %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1 - %elt = load i64, i64* %mid_p - store i64 %elt, i64* @i64_sink - ret void -} -; CHECK-LABEL: define void @memset_fp80_padding -; CHECK: alloca x86_fp80 -; CHECK: call void @llvm.memset.p0i8.i32(i8* align 16 %{{.*}}, i8 -1, i32 16, i1 false) -; CHECK: store i64 -1, i64* @i64_sink - -%S.vec3float = type { float, float, float } -%U.vec3float = type { <4 x float> } - -declare i32 @memcpy_vec3float_helper(%S.vec3float*) - -define i32 @memcpy_vec3float_widening(%S.vec3float* %x) { -; CHECK-LABEL: @memcpy_vec3float_widening( -; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte -; vector store, hence accidentally putting gibberish onto the stack. -entry: - ; Create a temporary variable %tmp1 and copy %x[0] into it - %tmp1 = alloca %S.vec3float, align 4 - %0 = bitcast %S.vec3float* %tmp1 to i8* - %1 = bitcast %S.vec3float* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 12, i1 false) - - ; The following block does nothing; but appears to confuse SROA - %unused1 = bitcast %S.vec3float* %tmp1 to %U.vec3float* - %unused2 = getelementptr inbounds %U.vec3float, %U.vec3float* %unused1, i32 0, i32 0 - %unused3 = load <4 x float>, <4 x float>* %unused2, align 1 - - ; Create a second temporary and copy %tmp1 into it - %tmp2 = alloca %S.vec3float, align 4 - %2 = bitcast %S.vec3float* %tmp2 to i8* - %3 = bitcast %S.vec3float* %tmp1 to i8* -; CHECK: alloca -; CHECK-NOT: store <4 x float> - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %2, i8* align 4 %3, i32 12, i1 false) - - %result = call i32 @memcpy_vec3float_helper(%S.vec3float* %tmp2) - ret i32 %result -; CHECK: ret i32 %result -} diff --git a/llvm/test/Transforms/SROA/vector-conversion.ll b/llvm/test/Transforms/SROA/vector-conversion.ll deleted file mode 100644 index 91ae5be6c3d..00000000000 --- a/llvm/test/Transforms/SROA/vector-conversion.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -define <4 x i64> @vector_ptrtoint({<2 x i32*>, <2 x i32*>} %x) { -; CHECK-LABEL: @vector_ptrtoint - %a = alloca {<2 x i32*>, <2 x i32*>} -; CHECK-NOT: alloca - - store {<2 x i32*>, <2 x i32*>} %x, {<2 x i32*>, <2 x i32*>}* %a -; CHECK-NOT: store - - %cast = bitcast {<2 x i32*>, <2 x i32*>}* %a to <4 x i64>* - %vec = load <4 x i64>, <4 x i64>* %cast -; CHECK-NOT: load -; CHECK: ptrtoint - - ret <4 x i64> %vec -} - -define <4 x i32*> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) { -; CHECK-LABEL: @vector_inttoptr - %a = alloca {<2 x i64>, <2 x i64>} -; CHECK-NOT: alloca - - store {<2 x i64>, <2 x i64>} %x, {<2 x i64>, <2 x i64>}* %a -; CHECK-NOT: store - - %cast = bitcast {<2 x i64>, <2 x i64>}* %a to <4 x i32*>* - %vec = load <4 x i32*>, <4 x i32*>* %cast -; CHECK-NOT: load -; CHECK: inttoptr - - ret <4 x i32*> %vec -} - -define <2 x i64> @vector_ptrtointbitcast({<1 x i32*>, <1 x i32*>} %x) { -; CHECK-LABEL: @vector_ptrtointbitcast - %a = alloca {<1 x i32*>, <1 x i32*>} -; CHECK-NOT: alloca - - store {<1 x i32*>, <1 x i32*>} %x, {<1 x i32*>, <1 x i32*>}* %a -; CHECK-NOT: store - - %cast = bitcast {<1 x i32*>, <1 x i32*>}* %a to <2 x i64>* - %vec = load <2 x i64>, <2 x i64>* %cast -; CHECK-NOT: load -; CHECK: ptrtoint -; CHECK: bitcast -; CHECK: ptrtoint -; CHECK: bitcast - - ret <2 x i64> %vec -} diff --git a/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll b/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll deleted file mode 100644 index abb5cb2ea33..00000000000 --- a/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: opt -sroa -S < %s | FileCheck %s - -target datalayout = "e-p:64:32-i64:32-v32:32-n32-S64" - -; Function Attrs: nounwind -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 - -; Function Attrs: nounwind -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0 - -; CHECK: @wombat -; CHECK-NOT: alloca -; CHECK: ret void -define void @wombat(<4 x float> %arg1) { -bb: - %tmp = alloca <4 x float>, align 16 - %tmp8 = bitcast <4 x float>* %tmp to i8* - call void @llvm.lifetime.start.p0i8(i64 16, i8* %tmp8) - store <4 x float> %arg1, <4 x float>* %tmp, align 16 - %tmp17 = bitcast <4 x float>* %tmp to <3 x float>* - %tmp18 = load <3 x float>, <3 x float>* %tmp17 - %tmp20 = bitcast <4 x float>* %tmp to i8* - call void @llvm.lifetime.end.p0i8(i64 16, i8* %tmp20) - call void @wombat3(<3 x float> %tmp18) - ret void -} - -; Function Attrs: nounwind -declare void @wombat3(<3 x float>) #0 - -attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll deleted file mode 100644 index 0bf2d23a591..00000000000 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ /dev/null @@ -1,625 +0,0 @@ -; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" - -%S1 = type { i64, [42 x float] } - -define i32 @test1(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @test1( -entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca - - %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 - store <4 x i32> %x, <4 x i32>* %a.x - %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 - store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store - - %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 - %tmp1 = load i32, i32* %a.tmp1 - %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3 - %tmp2 = load i32, i32* %a.tmp2 - %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 - %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: load -; CHECK: extractelement <4 x i32> %x, i32 2 -; CHECK-NEXT: extractelement <4 x i32> %y, i32 3 -; CHECK-NEXT: extractelement <4 x i32> %y, i32 0 - - %tmp4 = add i32 %tmp1, %tmp2 - %tmp5 = add i32 %tmp3, %tmp4 - ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret -} - -define i32 @test2(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @test2( -entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca - - %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 - store <4 x i32> %x, <4 x i32>* %a.x - %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 - store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store - - %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 - %tmp1 = load i32, i32* %a.tmp1 - %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3 - %tmp2 = load i32, i32* %a.tmp2 - %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 - %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>* - %tmp3.vec = load <2 x i32>, <2 x i32>* %a.tmp3.cast - %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0 -; CHECK-NOT: load -; CHECK: %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2 -; CHECK-NEXT: %[[extract2:.*]] = extractelement <4 x i32> %y, i32 3 -; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> undef, <2 x i32> <i32 0, i32 1> -; CHECK-NEXT: %[[extract4:.*]] = extractelement <2 x i32> %[[extract3]], i32 0 - - %tmp4 = add i32 %tmp1, %tmp2 - %tmp5 = add i32 %tmp3, %tmp4 - ret i32 %tmp5 -; CHECK-NEXT: %[[sum1:.*]] = add i32 %[[extract1]], %[[extract2]] -; CHECK-NEXT: %[[sum2:.*]] = add i32 %[[extract4]], %[[sum1]] -; CHECK-NEXT: ret i32 %[[sum2]] -} - -define i32 @test3(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @test3( -entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca - - %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 - store <4 x i32> %x, <4 x i32>* %a.x - %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 - store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store - - %a.y.cast = bitcast <4 x i32>* %a.y to i8* - call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i1 false) -; CHECK-NOT: memset - - %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 - %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* - call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i1 false) - %tmp1 = load i32, i32* %a.tmp1 - %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3 - %tmp2 = load i32, i32* %a.tmp2 - %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 - %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: load -; CHECK: %[[insert:.*]] = insertelement <4 x i32> %x, i32 -1, i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 3 -; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 0 - - %tmp4 = add i32 %tmp1, %tmp2 - %tmp5 = add i32 %tmp3, %tmp4 - ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret -} - -define i32 @test4(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) { -; CHECK-LABEL: @test4( -entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca - - %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 - store <4 x i32> %x, <4 x i32>* %a.x - %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 - store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store - - %a.y.cast = bitcast <4 x i32>* %a.y to i8* - %z.cast = bitcast <4 x i32>* %z to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i1 false) -; CHECK-NOT: memcpy - - %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 - %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* - %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2 - %z.tmp1.cast = bitcast i32* %z.tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i1 false) - %tmp1 = load i32, i32* %a.tmp1 - %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3 - %tmp2 = load i32, i32* %a.tmp2 - %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 - %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: memcpy -; CHECK: %[[load:.*]] = load <4 x i32>, <4 x i32>* %z -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2 -; CHECK-NEXT: %[[element_load:.*]] = load i32, i32* %[[gep]] -; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3 -; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0 - - %tmp4 = add i32 %tmp1, %tmp2 - %tmp5 = add i32 %tmp3, %tmp4 - ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret -} - -declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) nounwind - -; Same as test4 with a different sized address space pointer source. -define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) { -; CHECK-LABEL: @test4_as1( -entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca - - %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 - store <4 x i32> %x, <4 x i32>* %a.x - %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 - store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store - - %a.y.cast = bitcast <4 x i32>* %a.y to i8* - %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)* - call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i1 false) -; CHECK-NOT: memcpy - - %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 - %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* - %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i16 0, i16 2 - %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)* - call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i1 false) - %tmp1 = load i32, i32* %a.tmp1 - %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3 - %tmp2 = load i32, i32* %a.tmp2 - %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 - %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: memcpy -; CHECK: %[[load:.*]] = load <4 x i32>, <4 x i32> addrspace(1)* %z -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i64 0, i64 2 -; CHECK-NEXT: %[[element_load:.*]] = load i32, i32 addrspace(1)* %[[gep]] -; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3 -; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0 - - %tmp4 = add i32 %tmp1, %tmp2 - %tmp5 = add i32 %tmp3, %tmp4 - ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret -} - -define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) { -; CHECK-LABEL: @test5( -; The same as the above, but with reversed source and destination for the -; element memcpy, and a self copy. -entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca - - %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 - store <4 x i32> %x, <4 x i32>* %a.x - %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 - store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store - - %a.y.cast = bitcast <4 x i32>* %a.y to i8* - %a.x.cast = bitcast <4 x i32>* %a.x to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i1 false) -; CHECK-NOT: memcpy - - %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 - %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* - %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2 - %z.tmp1.cast = bitcast i32* %z.tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i1 false) - %tmp1 = load i32, i32* %a.tmp1 - %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3 - %tmp2 = load i32, i32* %a.tmp2 - %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 - %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: memcpy -; CHECK: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2 -; CHECK-NEXT: %[[extract:.*]] = extractelement <4 x i32> %y, i32 2 -; CHECK-NEXT: store i32 %[[extract]], i32* %[[gep]] -; CHECK-NEXT: extractelement <4 x i32> %y, i32 2 -; CHECK-NEXT: extractelement <4 x i32> %y, i32 3 -; CHECK-NEXT: extractelement <4 x i32> %y, i32 0 - - %tmp4 = add i32 %tmp1, %tmp2 - %tmp5 = add i32 %tmp3, %tmp4 - ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret -} - -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind - -define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) { -; CHECK-LABEL: @test6( -; The old scalarrepl pass would wrongly drop the store to the second alloca. -; PR13254 - %tmp = alloca { <4 x i64>, <4 x i64> } - %p0 = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0 - store <4 x i64> %x, <4 x i64>* %p0 -; CHECK: store <4 x i64> %x, - %p1 = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 1 - store <4 x i64> %y, <4 x i64>* %p1 -; CHECK: store <4 x i64> %y, - %addr = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0, i64 %n - %res = load i64, i64* %addr, align 4 - ret i64 %res -} - -define <4 x i32> @test_subvec_store() { -; CHECK-LABEL: @test_subvec_store( -entry: - %a = alloca <4 x i32> -; CHECK-NOT: alloca - - %a.gep0 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 0 - %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>* - store <2 x i32> <i32 0, i32 0>, <2 x i32>* %a.cast0 -; CHECK-NOT: store -; CHECK: select <4 x i1> <i1 true, i1 true, i1 false, i1 false> - - %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1 - %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>* - store <2 x i32> <i32 1, i32 1>, <2 x i32>* %a.cast1 -; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false> - - %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2 - %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>* - store <2 x i32> <i32 2, i32 2>, <2 x i32>* %a.cast2 -; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true> - - %a.gep3 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 3 - store i32 3, i32* %a.gep3 -; CHECK-NEXT: insertelement <4 x i32> - - %ret = load <4 x i32>, <4 x i32>* %a - - ret <4 x i32> %ret -; CHECK-NEXT: ret <4 x i32> -} - -define <4 x i32> @test_subvec_load() { -; CHECK-LABEL: @test_subvec_load( -entry: - %a = alloca <4 x i32> -; CHECK-NOT: alloca - store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a -; CHECK-NOT: store - - %a.gep0 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 0 - %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>* - %first = load <2 x i32>, <2 x i32>* %a.cast0 -; CHECK-NOT: load -; CHECK: %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 0, i32 1> - - %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1 - %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>* - %second = load <2 x i32>, <2 x i32>* %a.cast1 -; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 1, i32 2> - - %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2 - %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>* - %third = load <2 x i32>, <2 x i32>* %a.cast2 -; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - - %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> <i32 0, i32 2> - %ret = shufflevector <2 x i32> %tmp, <2 x i32> %third, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: %[[tmp:.*]] = shufflevector <2 x i32> %[[extract1]], <2 x i32> %[[extract2]], <2 x i32> <i32 0, i32 2> -; CHECK-NEXT: %[[ret:.*]] = shufflevector <2 x i32> %[[tmp]], <2 x i32> %[[extract3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> - - ret <4 x i32> %ret -; CHECK-NEXT: ret <4 x i32> %[[ret]] -} - -declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i1) nounwind - -define <4 x float> @test_subvec_memset() { -; CHECK-LABEL: @test_subvec_memset( -entry: - %a = alloca <4 x float> -; CHECK-NOT: alloca - - %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0 - %a.cast0 = bitcast float* %a.gep0 to i8* - call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i1 false) -; CHECK-NOT: store -; CHECK: select <4 x i1> <i1 true, i1 true, i1 false, i1 false> - - %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1 - %a.cast1 = bitcast float* %a.gep1 to i8* - call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i1 false) -; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false> - - %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2 - %a.cast2 = bitcast float* %a.gep2 to i8* - call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i1 false) -; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true> - - %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3 - %a.cast3 = bitcast float* %a.gep3 to i8* - call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i1 false) -; CHECK-NEXT: insertelement <4 x float> - - %ret = load <4 x float>, <4 x float>* %a - - ret <4 x float> %ret -; CHECK-NEXT: ret <4 x float> -} - -define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) { -; CHECK-LABEL: @test_subvec_memcpy( -entry: - %a = alloca <4 x float> -; CHECK-NOT: alloca - - %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0 - %a.cast0 = bitcast float* %a.gep0 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i1 false) -; CHECK: %[[xptr:.*]] = bitcast i8* %x to <2 x float>* -; CHECK-NEXT: %[[x:.*]] = load <2 x float>, <2 x float>* %[[xptr]] -; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> -; CHECK-NEXT: select <4 x i1> <i1 true, i1 true, i1 false, i1 false> - - %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1 - %a.cast1 = bitcast float* %a.gep1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i1 false) -; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>* -; CHECK-NEXT: %[[y:.*]] = load <2 x float>, <2 x float>* %[[yptr]] -; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef> -; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false> - - %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2 - %a.cast2 = bitcast float* %a.gep2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i1 false) -; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>* -; CHECK-NEXT: %[[z:.*]] = load <2 x float>, <2 x float>* %[[zptr]] -; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> -; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true> - - %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3 - %a.cast3 = bitcast float* %a.gep3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i1 false) -; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float* -; CHECK-NEXT: %[[f:.*]] = load float, float* %[[fptr]] -; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> - - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i1 false) -; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>* -; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3> -; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]] - - %ret = load <4 x float>, <4 x float>* %a - - ret <4 x float> %ret -; CHECK-NEXT: ret <4 x float> %[[insert_f]] -} - -define i32 @PR14212() { -; CHECK-LABEL: @PR14212( -; This caused a crash when "splitting" the load of the i32 in order to promote -; the store of <3 x i8> properly. Heavily reduced from an OpenCL test case. -entry: - %retval = alloca <3 x i8>, align 4 -; CHECK-NOT: alloca - - store <3 x i8> undef, <3 x i8>* %retval, align 4 - %cast = bitcast <3 x i8>* %retval to i32* - %load = load i32, i32* %cast, align 4 - ret i32 %load -; CHECK: ret i32 -} - -define <2 x i8> @PR14349.1(i32 %x) { -; CHECK: @PR14349.1 -; The first testcase for broken SROA rewriting of split integer loads and -; stores due to smaller vector loads and stores. This particular test ensures -; that we can rewrite a split store of an integer to a store of a vector. -entry: - %a = alloca i32 -; CHECK-NOT: alloca - - store i32 %x, i32* %a -; CHECK-NOT: store - - %cast = bitcast i32* %a to <2 x i8>* - %vec = load <2 x i8>, <2 x i8>* %cast -; CHECK-NOT: load - - ret <2 x i8> %vec -; CHECK: %[[trunc:.*]] = trunc i32 %x to i16 -; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8> -; CHECK: ret <2 x i8> %[[cast]] -} - -define i32 @PR14349.2(<2 x i8> %x) { -; CHECK: @PR14349.2 -; The first testcase for broken SROA rewriting of split integer loads and -; stores due to smaller vector loads and stores. This particular test ensures -; that we can rewrite a split load of an integer to a load of a vector. -entry: - %a = alloca i32 -; CHECK-NOT: alloca - - %cast = bitcast i32* %a to <2 x i8>* - store <2 x i8> %x, <2 x i8>* %cast -; CHECK-NOT: store - - %int = load i32, i32* %a -; CHECK-NOT: load - - ret i32 %int -; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16 -; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32 -; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]] -; CHECK: ret i32 %[[insert]] -} - -define i32 @test7(<2 x i32> %x, <2 x i32> %y) { -; Test that we can promote to vectors when the alloca doesn't mention any vector types. -; CHECK-LABEL: @test7( -entry: - %a = alloca [2 x i64] - %a.cast = bitcast [2 x i64]* %a to [2 x <2 x i32>]* -; CHECK-NOT: alloca - - %a.x = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 0 - store <2 x i32> %x, <2 x i32>* %a.x - %a.y = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1 - store <2 x i32> %y, <2 x i32>* %a.y -; CHECK-NOT: store - - %a.tmp1 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 0, i64 1 - %tmp1 = load i32, i32* %a.tmp1 - %a.tmp2 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 1 - %tmp2 = load i32, i32* %a.tmp2 - %a.tmp3 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 0 - %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: load -; CHECK: extractelement <2 x i32> %x, i32 1 -; CHECK-NEXT: extractelement <2 x i32> %y, i32 1 -; CHECK-NEXT: extractelement <2 x i32> %y, i32 0 - - %tmp4 = add i32 %tmp1, %tmp2 - %tmp5 = add i32 %tmp3, %tmp4 - ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret -} - -define i32 @test8(<2 x i32> %x) { -; Ensure that we can promote an alloca that doesn't mention a vector type based -; on a single store with a vector type. -; CHECK-LABEL: @test8( -entry: - %a = alloca i64 - %a.vec = bitcast i64* %a to <2 x i32>* - %a.i32 = bitcast i64* %a to i32* -; CHECK-NOT: alloca - - store <2 x i32> %x, <2 x i32>* %a.vec -; CHECK-NOT: store - - %tmp1 = load i32, i32* %a.i32 - %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1 - %tmp2 = load i32, i32* %a.tmp2 -; CHECK-NOT: load -; CHECK: extractelement <2 x i32> %x, i32 0 -; CHECK-NEXT: extractelement <2 x i32> %x, i32 1 - - %tmp4 = add i32 %tmp1, %tmp2 - ret i32 %tmp4 -; CHECK-NEXT: add -; CHECK-NEXT: ret -} - -define <2 x i32> @test9(i32 %x, i32 %y) { -; Ensure that we can promote an alloca that doesn't mention a vector type based -; on a single load with a vector type. -; CHECK-LABEL: @test9( -entry: - %a = alloca i64 - %a.vec = bitcast i64* %a to <2 x i32>* - %a.i32 = bitcast i64* %a to i32* -; CHECK-NOT: alloca - - store i32 %x, i32* %a.i32 - %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1 - store i32 %y, i32* %a.tmp2 -; CHECK-NOT: store -; CHECK: %[[V1:.*]] = insertelement <2 x i32> undef, i32 %x, i32 0 -; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1 - - %result = load <2 x i32>, <2 x i32>* %a.vec -; CHECK-NOT: load - - ret <2 x i32> %result -; CHECK-NEXT: ret <2 x i32> %[[V2]] -} - -define <2 x i32> @test10(<4 x i16> %x, i32 %y) { -; If there are multiple different vector types used, we should select the one -; with the widest elements. -; CHECK-LABEL: @test10( -entry: - %a = alloca i64 - %a.vec1 = bitcast i64* %a to <2 x i32>* - %a.vec2 = bitcast i64* %a to <4 x i16>* - %a.i32 = bitcast i64* %a to i32* -; CHECK-NOT: alloca - - store <4 x i16> %x, <4 x i16>* %a.vec2 - %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1 - store i32 %y, i32* %a.tmp2 -; CHECK-NOT: store -; CHECK: %[[V1:.*]] = bitcast <4 x i16> %x to <2 x i32> -; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1 - - %result = load <2 x i32>, <2 x i32>* %a.vec1 -; CHECK-NOT: load - - ret <2 x i32> %result -; CHECK-NEXT: ret <2 x i32> %[[V2]] -} - -define <2 x float> @test11(<4 x i16> %x, i32 %y) { -; If there are multiple different element types for different vector types, -; pick the integer types. This isn't really important, but seems like the best -; heuristic for making a deterministic decision. -; CHECK-LABEL: @test11( -entry: - %a = alloca i64 - %a.vec1 = bitcast i64* %a to <2 x float>* - %a.vec2 = bitcast i64* %a to <4 x i16>* - %a.i32 = bitcast i64* %a to i32* -; CHECK-NOT: alloca - - store <4 x i16> %x, <4 x i16>* %a.vec2 - %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1 - store i32 %y, i32* %a.tmp2 -; CHECK-NOT: store -; CHECK: %[[V1:.*]] = bitcast i32 %y to <2 x i16> -; CHECK-NEXT: %[[V2:.*]] = shufflevector <2 x i16> %[[V1]], <2 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> -; CHECK-NEXT: %[[V3:.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i16> %[[V2]], <4 x i16> %x -; CHECK-NEXT: %[[V4:.*]] = bitcast <4 x i16> %[[V3]] to <2 x float> - - %result = load <2 x float>, <2 x float>* %a.vec1 -; CHECK-NOT: load - - ret <2 x float> %result -; CHECK-NEXT: ret <2 x float> %[[V4]] -} - -define <4 x float> @test12() { -; CHECK-LABEL: @test12( - %a = alloca <3 x i32>, align 16 -; CHECK-NOT: alloca - - %cast1 = bitcast <3 x i32>* %a to <4 x i32>* - store <4 x i32> undef, <4 x i32>* %cast1, align 16 -; CHECK-NOT: store - - %cast2 = bitcast <3 x i32>* %a to <3 x float>* - %cast3 = bitcast <3 x float>* %cast2 to <4 x float>* - %vec = load <4 x float>, <4 x float>* %cast3 -; CHECK-NOT: load - -; CHECK: %[[ret:.*]] = bitcast <4 x i32> undef to <4 x float> -; CHECK-NEXT: ret <4 x float> %[[ret]] - ret <4 x float> %vec -} diff --git a/llvm/test/Transforms/SROA/vectors-of-pointers.ll b/llvm/test/Transforms/SROA/vectors-of-pointers.ll deleted file mode 100644 index ff09e959896..00000000000 --- a/llvm/test/Transforms/SROA/vectors-of-pointers.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt < %s -sroa - -; Make sure we don't crash on this one. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.8.0" - -define void @foo() { -entry: - %Args.i = alloca <2 x i32*>, align 16 - br i1 undef, label %bb0.exit158, label %if.then.i.i.i.i.i138 - -if.then.i.i.i.i.i138: - unreachable - -bb0.exit158: - br i1 undef, label %bb0.exit257, label %if.then.i.i.i.i.i237 - -if.then.i.i.i.i.i237: - unreachable - -bb0.exit257: - %0 = load <2 x i32*>, <2 x i32*>* %Args.i, align 16 - unreachable -} |