diff options
Diffstat (limited to 'llvm/test/Transforms/CodeGenPrepare/ARM')
9 files changed, 1065 insertions, 0 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll new file mode 100644 index 00000000000..f5644e4ad31 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -loop-unroll -codegenprepare < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7--linux-gnueabihf" + +; CHECK-LABEL: @f +define i32 @f(i32 %a) #0 { +; CHECK: call i32 @llvm.bitreverse.i32 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %or + +for.body: ; preds = %for.body, %entry + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %b.07 = phi i32 [ 0, %entry ], [ %or, %for.body ] + %shr = lshr i32 %a, %i.08 + %and = and i32 %shr, 1 + %sub = sub nuw nsw i32 31, %i.08 + %shl = shl i32 %and, %sub + %or = or i32 %shl, %b.07 + %inc = add nuw nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %inc, 32 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !3 +} + +attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 3.8.0"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.full"} diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll new file mode 100644 index 00000000000..9b9f58c70ac --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll @@ -0,0 +1,157 @@ +; RUN: llc -mtriple=armv6m-linux-gnueabi -verify-machineinstrs -o - %s -disable-constant-hoisting | FileCheck %s + +%struct_type = type { [10000 x i32], i32, i32 } + +define void @test1(%struct_type** %s, i32 %n) { +; CHECK-LABEL: test1 +entry: + %struct = load %struct_type*, %struct_type** %s + br label %while_cond + +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] + %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1 + %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end + +while_body: +; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}] +; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4] + %i = add i32 %phi, 1 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + br label %while_cond + +while_end: + ret void +; CHECK: .LCPI0_0: +; CHECK-NEXT: .long 40000 +; CHECK-NOT: LCPI0 +} + +define void @test2(%struct_type* %struct, i32 %n) { +; CHECK-LABEL: test2 +entry: + %cmp = icmp eq %struct_type* %struct, null + br i1 %cmp, label %while_end, label %while_cond + +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] + %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1 + %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2 + %cmp1 = icmp slt i32 %phi, %n + br i1 %cmp1, label %while_body, label %while_end + +while_body: +; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}] +; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4] + %i = add i32 %phi, 1 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + br label %while_cond + +while_end: + ret void +; CHECK: .LCPI1_0: +; CHECK-NEXT: .long 40000 +; CHECK-NOT: LCPI1 +} + +define void @test3(%struct_type* %s1, %struct_type* %s2, i1 %cond, i32 %n) { +; CHECK-LABEL: test3 +entry: + br i1 %cond, label %if_true, label %if_end + +if_true: + br label %if_end + +if_end: + %struct = phi %struct_type* [ %s1, %entry ], [ %s2, %if_true ] + %cmp = icmp eq %struct_type* %struct, null + br i1 %cmp, label %while_end, label %while_cond + +while_cond: + %phi = phi i32 [ 0, %if_end ], [ %i, %while_body ] + %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1 + %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2 + %cmp1 = icmp slt i32 %phi, %n + br i1 %cmp1, label %while_body, label %while_end + +while_body: +; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}] +; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4] + %i = add i32 %phi, 1 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + br label %while_cond + +while_end: + ret void +; CHECK: .LCPI2_0: +; CHECK-NEXT: .long 40000 +; CHECK-NOT: LCPI2 +} + +declare %struct_type* @foo() + +define void @test4(i32 %n) personality i32 (...)* @__FrameHandler { +; CHECK-LABEL: test4 +entry: + %struct = invoke %struct_type* @foo() to label %while_cond unwind label %cleanup + +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] + %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1 + %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end + +while_body: +; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}] +; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4] + %i = add i32 %phi, 1 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + br label %while_cond + +while_end: + ret void + +cleanup: + landingpad { i8*, i32 } cleanup + unreachable +; CHECK: .LCPI3_0: +; CHECK-NEXT: .long 40000 +; CHECK-NOT: LCPI3 +} + +declare i32 @__FrameHandler(...) + +define void @test5([65536 x i32]** %s, i32 %n) { +; CHECK-LABEL: test5 +entry: + %struct = load [65536 x i32]*, [65536 x i32]** %s + br label %while_cond + +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] + %gep0 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20000 + %gep1 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20001 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end + +while_body: +; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}] +; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4] + %i = add i32 %phi, 1 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + br label %while_cond + +while_end: + ret void +; CHECK: .LCPI4_0: +; CHECK-NEXT: .long 80000 +; CHECK-NOT: LCPI4 +} diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg b/llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg new file mode 100644 index 00000000000..98c6700c209 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll new file mode 100644 index 00000000000..8b70d9381cd --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll @@ -0,0 +1,43 @@ +; RUN: opt -codegenprepare -mtriple=arm7-unknown-unknown -S < %s | FileCheck %s + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) nounwind + +define void @test_memcpy(i8* align 4 %dst, i8* align 8 %src, i32 %N) { +; CHECK-LABEL: @test_memcpy +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false) +entry: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %N, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %dst, i8* align 2 %src, i32 %N, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false) + ret void +} + +define void @test_memmove(i8* align 4 %dst, i8* align 8 %src, i32 %N) { +; CHECK-LABEL: @test_memmove +; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false) +; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false) +; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false) +entry: + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %N, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 2 %dst, i8* align 2 %src, i32 %N, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false) + ret void +} + +define void @test_memset(i8* align 4 %dst, i8 %val, i32 %N) { +; CHECK-LABEL: @test_memset +; CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %dst, i8 %val, i32 %N, i1 false) +; CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %dst, i8 %val, i32 %N, i1 false) +; CHECK: call void @llvm.memset.p0i8.i32(i8* align 8 %dst, i8 %val, i32 %N, i1 false) +entry: + call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val, i32 %N, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 2 %dst, i8 %val, i32 %N, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 8 %dst, i8 %val, i32 %N, i1 false) + ret void +} + + diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll new file mode 100644 index 00000000000..3fbc2133141 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll @@ -0,0 +1,56 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.main-arm-none-eabi" + +; CHECK-LABEL: uadd_overflow_too_far_cmp_dom +; CHECK-NOT: with.overflow.i32 +define i32 @uadd_overflow_too_far_cmp_dom(i32 %arg0) { +entry: + %cmp = icmp ne i32 %arg0, 0 + br i1 %cmp, label %if.else, label %if.then + +if.then: + call void @foo() + br label %exit + +if.else: + call void @bar() + br label %if.end + +if.end: + %dec = add nsw i32 %arg0, -1 + br label %exit + +exit: + %res = phi i32 [ %arg0, %if.then ], [ %dec, %if.end ] + ret i32 %res +} + +; CHECK-LABEL: uadd_overflow_too_far_math_dom +; CHECK-NOT: with.overflow.i32 +define i32 @uadd_overflow_too_far_math_dom(i32 %arg0, i32 %arg1) { +entry: + %dec = add nsw i32 %arg0, -1 + %cmp = icmp ugt i32 %arg0, 1 + br i1 %cmp, label %if.else, label %if.then + +if.then: + call void @foo() + br label %if.end + +if.else: + call void @bar() + br label %if.end + +if.end: + %cmp.i.i = icmp ne i32 %arg0, 0 + %tobool = zext i1 %cmp.i.i to i32 + br label %exit + +exit: + ret i32 %tobool +} + +declare void @foo() +declare void @bar() diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll new file mode 100644 index 00000000000..a26edb19da0 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll @@ -0,0 +1,420 @@ +; RUN: opt -S -codegenprepare -mtriple=thumbv7m -disable-complex-addr-modes=false -addr-sink-new-select=true -addr-sink-new-phis=true < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +@gv1 = common global i32 0, align 4 +@gv2 = common global i32 0, align 4 + +; Phi selects between ptr and gep with ptr as base and constant offset +define void @test_phi_onegep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_phi_onegep_offset +; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ] +; CHECK: phi i32 [ 4, %if.then ], [ 0, %entry ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %gep = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.end: + %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with same base, different constant offsets +define void @test_phi_twogep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_phi_twogep_offset +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32 [ 8, %if.else ], [ 4, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between ptr and gep with ptr as base and nonconstant offset +define void @test_phi_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) { +; CHECK-LABEL: @test_phi_onegep_nonconst_offset +; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ] +; CHECK: phi i32 [ %off, %if.then ], [ 0, %entry ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %gep = getelementptr inbounds i32, i32* %ptr, i32 %off + br label %if.end + +if.end: + %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with same base, different nonconstant offsets +define void @test_phi_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) { +; CHECK-LABEL: @test_phi_twogep_nonconst_offset +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32 [ %off2, %if.else ], [ %off1, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with different base, same constant offset +define void @test_phi_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) { +; CHECK-LABEL: @test_phi_twogep_base +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32* [ %ptr2, %if.else ], [ %ptr1, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with different base global variables, same constant offset +define void @test_phi_twogep_base_gv(i32 %value) { +; CHECK-LABEL: @test_phi_twogep_base_gv +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32* [ @gv2, %if.else ], [ @gv1, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between ptr and gep with ptr as base and constant offset +define void @test_select_onegep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_select_onegep_offset +; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep +; CHECK: select i1 %cmp, i32 0, i32 4 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep = getelementptr inbounds i32, i32* %ptr, i32 1 + %select = select i1 %cmp, i32* %ptr, i32* %gep + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with same base, different constant offsets +define void @test_select_twogep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_select_twogep_offset +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32 4, i32 8 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between ptr and gep with ptr as base and nonconstant offset +define void @test_select_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) { +; CHECK-LABEL: @test_select_onegep_nonconst_offset +; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep +; CHECK: select i1 %cmp, i32 0, i32 %off +entry: + %cmp = icmp sgt i32 %value, 0 + %gep = getelementptr inbounds i32, i32* %ptr, i32 %off + %select = select i1 %cmp, i32* %ptr, i32* %gep + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with same base, different nonconstant offsets +define void @test_select_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) { +; CHECK-LABEL: @test_select_twogep_nonconst_offset +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32 %off1, i32 %off2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with different base, same constant offset +define void @test_select_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) { +; CHECK-LABEL: @test_select_twogep_base +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with different base global variables, same constant offset +define void @test_select_twogep_base_gv(i32 %value) { +; CHECK-LABEL: @test_select_twogep_base_gv +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1 + %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; If the phi is in a different block to where the gep will be, the phi goes where +; the original phi was not where the gep is. +; CHECK-LABEL: @test_phi_different_block +; CHECK-LABEL: if1.end +; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if1.then ] +; CHECK: phi i32 [ 4, %if1.then ], [ 0, %entry ] +define void @test_phi_different_block(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %cmp1 = icmp sgt i32 %value1, 0 + br i1 %cmp1, label %if1.then, label %if1.end + +if1.then: + %gep = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if1.end + +if1.end: + %phi = phi i32* [ %ptr, %entry ], [ %gep, %if1.then ] + %cmp2 = icmp sgt i32 %value2, 0 + br i1 %cmp2, label %if2.then, label %if2.end + +if2.then: + store i32 %value1, i32* %ptr, align 4 + br label %if2.end + +if2.end: + store i32 %value2, i32* %phi, align 4 + ret void +} + +; A phi with three incoming values should be optimised +; CHECK-LABEL: @test_phi_threegep +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] +; CHECK: phi i32 [ 12, %if.else.else ], [ 8, %if.else.then ], [ 4, %if.then ] +define void @test_phi_threegep(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %cmp1 = icmp sgt i32 %value1, 0 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.else: + %cmp2 = icmp sgt i32 %value2, 0 + br i1 %cmp2, label %if.else.then, label %if.else.else + +if.else.then: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + br label %if.end + +if.else.else: + %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] + store i32 %value1, i32* %phi, align 4 + ret void +} + +; A phi with two incoming values but three geps due to nesting should be +; optimised +; CHECK-LABEL: @test_phi_threegep_nested +; CHECK: %[[PHI:[a-z0-9_]+]] = phi i32 [ 12, %if.else.else ], [ 8, %if.else.then ] +; CHECK: phi i32 [ %[[PHI]], %if.else.end ], [ 4, %if.then ] +define void @test_phi_threegep_nested(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %cmp1 = icmp sgt i32 %value1, 0 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.else: + %cmp2 = icmp sgt i32 %value2, 0 + br i1 %cmp2, label %if.else.then, label %if.else.else + +if.else.then: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + br label %if.else.end + +if.else.else: + %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3 + br label %if.else.end + +if.else.end: + %gep4 = phi i32* [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] + store i32 %value2, i32* %ptr, align 4 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep4, %if.else.end ] + store i32 %value1, i32* %phi, align 4 + ret void +} + +; A nested select is expected to be optimised +; CHECK-LABEL: @test_nested_select +; CHECK: %[[SELECT:[a-z0-9_]+]] = select i1 %cmp2, i32 4, i32 8 +; CHECK: select i1 %cmp1, i32 4, i32 %[[SELECT]] +define void @test_nested_select(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + %cmp1 = icmp sgt i32 %value1, 0 + %cmp2 = icmp sgt i32 %value2, 0 + %select1 = select i1 %cmp2, i32* %gep1, i32* %gep2 + %select2 = select i1 %cmp1, i32* %gep1, i32* %select1 + store i32 %value1, i32* %select2, align 4 + ret void +} + +; Scaling the offset by a different amount is expected not to be optimised +; CHECK-LABEL: @test_select_different_scale +; CHECK: select i1 %cmp, i32* %gep1, i32* %castgep +define void @test_select_different_scale(i32* %ptr, i32 %value, i32 %off) { +entry: + %cmp = icmp sgt i32 %value, 0 + %castptr = bitcast i32* %ptr to i16* + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off + %gep2 = getelementptr inbounds i16, i16* %castptr, i32 %off + %castgep = bitcast i16* %gep2 to i32* + %select = select i1 %cmp, i32* %gep1, i32* %castgep + store i32 %value, i32* %select, align 4 + ret void +} + +; A select between two values is already the best we can do +; CHECK-LABEL: @test_select_trivial +; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2 +define void @test_select_trivial(i32* %ptr1, i32* %ptr2, i32 %value) { +entey: + %cmp = icmp sgt i32 %value, 0 + %select = select i1 %cmp, i32* %ptr1, i32* %ptr2 + store i32 %value, i32* %select, align 4 + ret void +} + +; A select between two global variables is already the best we can do +; CHECK-LABEL: @test_select_trivial_gv +; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2 +define void @test_select_trivial_gv(i32 %value) { +entey: + %cmp = icmp sgt i32 %value, 0 + %select = select i1 %cmp, i32* @gv1, i32* @gv2 + store i32 %value, i32* %select, align 4 + ret void +} + +; Same for a select between a value and global variable +; CHECK-LABEL: @test_select_trivial_ptr_gv +; CHECK: select i1 %cmp, i32* %ptr, i32* @gv2 +define void @test_select_trivial_ptr_gv(i32* %ptr, i32 %value) { +entry: + %cmp = icmp sgt i32 %value, 0 + %select = select i1 %cmp, i32* %ptr, i32* @gv2 + store i32 %value, i32* %select, align 4 + ret void +} + +; Same for a select between a global variable and null, though the test needs to +; be a little more complicated to avoid dereferencing a potential null pointer +; CHECK-LABEL: @test_select_trivial_gv_null +; CHECK: select i1 %cmp.i, i32* @gv1, i32* null +define void @test_select_trivial_gv_null(){ +entry: + %gv1_val = load i32, i32* @gv1, align 4 + %cmp.i = icmp eq i32 %gv1_val, 0 + %spec.select.i = select i1 %cmp.i, i32* @gv1, i32* null + br i1 %cmp.i, label %if.then, label %if.end + +if.then: + %val = load i32, i32* %spec.select.i, align 4 + %inc = add nsw i32 %val, 1 + store i32 %inc, i32* %spec.select.i, align 4 + br label %if.end + +if.end: + ret void +} + +; Same for a select between a value and null +; CHECK-LABEL: @test_select_trivial_ptr_null +; CHECK: select i1 %cmp.i, i32* %ptr, i32* null +define void @test_select_trivial_ptr_null(i32* %ptr){ +entry: + %gv1_val = load i32, i32* %ptr, align 4 + %cmp.i = icmp eq i32 %gv1_val, 0 + %spec.select.i = select i1 %cmp.i, i32* %ptr, i32* null + br i1 %cmp.i, label %if.then, label %if.end + +if.then: + %val = load i32, i32* %spec.select.i, align 4 + %inc = add nsw i32 %val, 1 + store i32 %inc, i32* %spec.select.i, align 4 + br label %if.end + +if.end: + ret void +} diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll new file mode 100644 index 00000000000..9dd0b373aa2 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll @@ -0,0 +1,232 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=armv7-apple-darwin < %s -codegenprepare -S | FileCheck -check-prefix=NEON %s +; RUN: opt -mtriple=armv6-unknown-linux < %s -codegenprepare -S | FileCheck -check-prefix=NONEON %s + +define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; NEON-LABEL: @sink_zext( +; NEON-NEXT: entry: +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> +; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> +; NEON-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] +; NEON-NEXT: ret <8 x i16> [[RES_2]] +; +; NONEON-LABEL: @sink_zext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> +; NONEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]] +; NONEON-NEXT: ret <8 x i16> [[RES_2]] +; +entry: + %za = zext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = zext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = zext <8 x i8> %b to <8 x i16> + %res.2 = sub <8 x i16> %za, %zb.2 + ret <8 x i16> %res.2 +} + +define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; NEON-LABEL: @sink_sext( +; NEON-NEXT: entry: +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; NEON-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] +; NEON-NEXT: ret <8 x i16> [[RES_2]] +; +; NONEON-LABEL: @sink_sext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; NONEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]] +; NONEON-NEXT: ret <8 x i16> [[RES_2]] +; +entry: + %za = sext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = sext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = sext <8 x i8> %b to <8 x i16> + %res.2 = sub <8 x i16> %za, %zb.2 + ret <8 x i16> %res.2 +} + +define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i16> %b, i1 %c) { +; +; NEON-LABEL: @do_not_sink_nonfree_zext( +; NEON-NEXT: entry: +; NEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: ret <8 x i16> [[B]] +; +; NONEON-LABEL: @do_not_sink_nonfree_zext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: ret <8 x i16> [[B]] +; +entry: + %za = zext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %res.1 = add <8 x i16> %za, %b + ret <8 x i16> %res.1 + +if.else: + ret <8 x i16> %b +} + +define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i16> %b, i1 %c) { +; CHECK-LABEL: @do_not_sink_nonfree_sext( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; CHECK-NEXT: ret <8 x i16> [[RES_1]] +; CHECK: if.else: +; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[ZB_2]] +; +; NEON-LABEL: @do_not_sink_nonfree_sext( +; NEON-NEXT: entry: +; NEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: ret <8 x i16> [[B]] +; +; NONEON-LABEL: @do_not_sink_nonfree_sext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: ret <8 x i16> [[B]] +; +entry: + %za = sext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %res.1 = add <8 x i16> %za, %b + ret <8 x i16> %res.1 + +if.else: + ret <8 x i16> %b +} + +declare void @user1(<8 x i16>) + +; Exts can be sunk. +define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b) { +; NEON-LABEL: @sink_shufflevector_ext_subadd_multiuse( +; NEON-NEXT: entry: +; NEON-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; NEON-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; NEON-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NEON-NEXT: call void @user1(<8 x i16> [[Z3]]) +; NEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; NEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> +; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; NEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP0]], [[Z2]] +; NEON-NEXT: ret <8 x i16> [[RES1]] +; NEON: if.else: +; NEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; NEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> +; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP1]], [[Z4]] +; NEON-NEXT: ret <8 x i16> [[RES2]] +; +; NONEON-LABEL: @sink_shufflevector_ext_subadd_multiuse( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; NONEON-NEXT: [[Z1:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; NONEON-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; NONEON-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NONEON-NEXT: call void @user1(<8 x i16> [[Z3]]) +; NONEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; NONEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> +; NONEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[Z1]], [[Z2]] +; NONEON-NEXT: ret <8 x i16> [[RES1]] +; NONEON: if.else: +; NONEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; NONEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> +; NONEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[Z3]], [[Z4]] +; NONEON-NEXT: ret <8 x i16> [[RES2]] +; +entry: + %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %z1 = zext <8 x i8> %s1 to <8 x i16> + %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %z3 = sext <8 x i8> %s3 to <8 x i16> + call void @user1(<8 x i16> %z3) + br i1 undef, label %if.then, label %if.else + +if.then: + %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %z2 = zext <8 x i8> %s2 to <8 x i16> + %res1 = add <8 x i16> %z1, %z2 + ret <8 x i16> %res1 + +if.else: + %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %z4 = sext <8 x i8> %s4 to <8 x i16> + %res2 = sub <8 x i16> %z3, %z4 + ret <8 x i16> %res2 +} diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll new file mode 100644 index 00000000000..b2edb852385 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -codegenprepare %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-arm-none-eabi" + +; Check that we have deterministic output +define void @test([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = bitcast [65536 x i32]* %t to i8* +; CHECK-NEXT: %splitgep1 = getelementptr i8, i8* %0, i32 80000 +; CHECK-NEXT: %s = load [65536 x i32]*, [65536 x i32]** %sp +; CHECK-NEXT: %1 = bitcast [65536 x i32]* %s to i8* +; CHECK-NEXT: %splitgep = getelementptr i8, i8* %1, i32 80000 +entry: + %s = load [65536 x i32]*, [65536 x i32]** %sp + br label %while_cond + +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] + %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000 + %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001 + %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000 + %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end + +while_body: + %i = add i32 %phi, 1 + %j = add i32 %phi, 2 + store i32 %i, i32* %gep0 + store i32 %phi, i32* %gep1 + store i32 %i, i32* %gep2 + store i32 %phi, i32* %gep3 + br label %while_cond + +while_end: + ret void +} + diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll new file mode 100644 index 00000000000..09658ae75ac --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll @@ -0,0 +1,77 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target triple = "armv8m.main-none-eabi" + +declare i8* @f0() +declare i8* @f1() + +define i8* @tail_dup() { +; CHECK-LABEL: tail_dup +; CHECK: tail call i8* @f0() +; CHECK-NEXT: ret i8* +; CHECK: tail call i8* @f1() +; CHECK-NEXT: ret i8* +bb0: + %tmp0 = tail call i8* @f0() + br label %return +bb1: + %tmp1 = tail call i8* @f1() + br label %return +return: + %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ] + ret i8* %retval +} + +define nonnull i8* @nonnull_dup() { +; CHECK-LABEL: nonnull_dup +; CHECK: tail call i8* @f0() +; CHECK-NEXT: ret i8* +; CHECK: tail call i8* @f1() +; CHECK-NEXT: ret i8* +bb0: + %tmp0 = tail call i8* @f0() + br label %return +bb1: + %tmp1 = tail call i8* @f1() + br label %return +return: + %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ] + ret i8* %retval +} + +define i8* @noalias_dup() { +; CHECK-LABEL: noalias_dup +; CHECK: tail call noalias i8* @f0() +; CHECK-NEXT: ret i8* +; CHECK: tail call noalias i8* @f1() +; CHECK-NEXT: ret i8* +bb0: + %tmp0 = tail call noalias i8* @f0() + br label %return +bb1: + %tmp1 = tail call noalias i8* @f1() + br label %return +return: + %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ] + ret i8* %retval +} + +; Use inreg as a way of testing that attributes (other than nonnull and +; noalias) disable the tailcall duplication in cgp. + +define inreg i8* @inreg_nodup() { +; CHECK-LABEL: inreg_nodup +; CHECK: tail call i8* @f0() +; CHECK-NEXT: br label %return +; CHECK: tail call i8* @f1() +; CHECK-NEXT: br label %return +bb0: + %tmp0 = tail call i8* @f0() + br label %return +bb1: + %tmp1 = tail call i8* @f1() + br label %return +return: + %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ] + ret i8* %retval +} |