summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/CodeGenPrepare/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Transforms/CodeGenPrepare/ARM')
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll37
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll157
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg3
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll43
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll56
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll420
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll232
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll40
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll77
9 files changed, 1065 insertions, 0 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll
new file mode 100644
index 00000000000..f5644e4ad31
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -loop-unroll -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+; CHECK-LABEL: @f
+define i32 @f(i32 %a) #0 {
+; CHECK: call i32 @llvm.bitreverse.i32
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret i32 %or
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %b.07 = phi i32 [ 0, %entry ], [ %or, %for.body ]
+ %shr = lshr i32 %a, %i.08
+ %and = and i32 %shr, 1
+ %sub = sub nuw nsw i32 31, %i.08
+ %shl = shl i32 %and, %sub
+ %or = or i32 %shl, %b.07
+ %inc = add nuw nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %inc, 32
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !3
+}
+
+attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"clang version 3.8.0"}
+!3 = distinct !{!3, !4}
+!4 = !{!"llvm.loop.unroll.full"}
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll
new file mode 100644
index 00000000000..9b9f58c70ac
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll
@@ -0,0 +1,157 @@
+; RUN: llc -mtriple=armv6m-linux-gnueabi -verify-machineinstrs -o - %s -disable-constant-hoisting | FileCheck %s
+
+%struct_type = type { [10000 x i32], i32, i32 }
+
+define void @test1(%struct_type** %s, i32 %n) {
+; CHECK-LABEL: test1
+entry:
+ %struct = load %struct_type*, %struct_type** %s
+ br label %while_cond
+
+while_cond:
+ %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+ %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+ %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+ %cmp = icmp slt i32 %phi, %n
+ br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+ %i = add i32 %phi, 1
+ store i32 %i, i32* %gep0
+ store i32 %phi, i32* %gep1
+ br label %while_cond
+
+while_end:
+ ret void
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .long 40000
+; CHECK-NOT: LCPI0
+}
+
+define void @test2(%struct_type* %struct, i32 %n) {
+; CHECK-LABEL: test2
+entry:
+ %cmp = icmp eq %struct_type* %struct, null
+ br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+ %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+ %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+ %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+ %cmp1 = icmp slt i32 %phi, %n
+ br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+ %i = add i32 %phi, 1
+ store i32 %i, i32* %gep0
+ store i32 %phi, i32* %gep1
+ br label %while_cond
+
+while_end:
+ ret void
+; CHECK: .LCPI1_0:
+; CHECK-NEXT: .long 40000
+; CHECK-NOT: LCPI1
+}
+
+define void @test3(%struct_type* %s1, %struct_type* %s2, i1 %cond, i32 %n) {
+; CHECK-LABEL: test3
+entry:
+ br i1 %cond, label %if_true, label %if_end
+
+if_true:
+ br label %if_end
+
+if_end:
+ %struct = phi %struct_type* [ %s1, %entry ], [ %s2, %if_true ]
+ %cmp = icmp eq %struct_type* %struct, null
+ br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+ %phi = phi i32 [ 0, %if_end ], [ %i, %while_body ]
+ %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+ %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+ %cmp1 = icmp slt i32 %phi, %n
+ br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+ %i = add i32 %phi, 1
+ store i32 %i, i32* %gep0
+ store i32 %phi, i32* %gep1
+ br label %while_cond
+
+while_end:
+ ret void
+; CHECK: .LCPI2_0:
+; CHECK-NEXT: .long 40000
+; CHECK-NOT: LCPI2
+}
+
+declare %struct_type* @foo()
+
+define void @test4(i32 %n) personality i32 (...)* @__FrameHandler {
+; CHECK-LABEL: test4
+entry:
+ %struct = invoke %struct_type* @foo() to label %while_cond unwind label %cleanup
+
+while_cond:
+ %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+ %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+ %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+ %cmp = icmp slt i32 %phi, %n
+ br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+ %i = add i32 %phi, 1
+ store i32 %i, i32* %gep0
+ store i32 %phi, i32* %gep1
+ br label %while_cond
+
+while_end:
+ ret void
+
+cleanup:
+ landingpad { i8*, i32 } cleanup
+ unreachable
+; CHECK: .LCPI3_0:
+; CHECK-NEXT: .long 40000
+; CHECK-NOT: LCPI3
+}
+
+declare i32 @__FrameHandler(...)
+
+define void @test5([65536 x i32]** %s, i32 %n) {
+; CHECK-LABEL: test5
+entry:
+ %struct = load [65536 x i32]*, [65536 x i32]** %s
+ br label %while_cond
+
+while_cond:
+ %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+ %gep0 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20000
+ %gep1 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20001
+ %cmp = icmp slt i32 %phi, %n
+ br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK: str r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+ %i = add i32 %phi, 1
+ store i32 %i, i32* %gep0
+ store i32 %phi, i32* %gep1
+ br label %while_cond
+
+while_end:
+ ret void
+; CHECK: .LCPI4_0:
+; CHECK-NEXT: .long 80000
+; CHECK-NOT: LCPI4
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg b/llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg
new file mode 100644
index 00000000000..98c6700c209
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+ config.unsupported = True
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll
new file mode 100644
index 00000000000..8b70d9381cd
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll
@@ -0,0 +1,43 @@
+; RUN: opt -codegenprepare -mtriple=arm7-unknown-unknown -S < %s | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) nounwind
+
+define void @test_memcpy(i8* align 4 %dst, i8* align 8 %src, i32 %N) {
+; CHECK-LABEL: @test_memcpy
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false)
+entry:
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %N, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %dst, i8* align 2 %src, i32 %N, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false)
+ ret void
+}
+
+define void @test_memmove(i8* align 4 %dst, i8* align 8 %src, i32 %N) {
+; CHECK-LABEL: @test_memmove
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false)
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false)
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false)
+entry:
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %N, i1 false)
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* align 2 %dst, i8* align 2 %src, i32 %N, i1 false)
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false)
+ ret void
+}
+
+define void @test_memset(i8* align 4 %dst, i8 %val, i32 %N) {
+; CHECK-LABEL: @test_memset
+; CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %dst, i8 %val, i32 %N, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %dst, i8 %val, i32 %N, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i32(i8* align 8 %dst, i8 %val, i32 %N, i1 false)
+entry:
+ call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val, i32 %N, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* align 2 %dst, i8 %val, i32 %N, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* align 8 %dst, i8 %val, i32 %N, i1 false)
+ ret void
+}
+
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll
new file mode 100644
index 00000000000..3fbc2133141
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll
@@ -0,0 +1,56 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8m.main-arm-none-eabi"
+
+; CHECK-LABEL: uadd_overflow_too_far_cmp_dom
+; CHECK-NOT: with.overflow.i32
+define i32 @uadd_overflow_too_far_cmp_dom(i32 %arg0) {
+entry:
+ %cmp = icmp ne i32 %arg0, 0
+ br i1 %cmp, label %if.else, label %if.then
+
+if.then:
+ call void @foo()
+ br label %exit
+
+if.else:
+ call void @bar()
+ br label %if.end
+
+if.end:
+ %dec = add nsw i32 %arg0, -1
+ br label %exit
+
+exit:
+ %res = phi i32 [ %arg0, %if.then ], [ %dec, %if.end ]
+ ret i32 %res
+}
+
+; CHECK-LABEL: uadd_overflow_too_far_math_dom
+; CHECK-NOT: with.overflow.i32
+define i32 @uadd_overflow_too_far_math_dom(i32 %arg0, i32 %arg1) {
+entry:
+ %dec = add nsw i32 %arg0, -1
+ %cmp = icmp ugt i32 %arg0, 1
+ br i1 %cmp, label %if.else, label %if.then
+
+if.then:
+ call void @foo()
+ br label %if.end
+
+if.else:
+ call void @bar()
+ br label %if.end
+
+if.end:
+ %cmp.i.i = icmp ne i32 %arg0, 0
+ %tobool = zext i1 %cmp.i.i to i32
+ br label %exit
+
+exit:
+ ret i32 %tobool
+}
+
+declare void @foo()
+declare void @bar()
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll
new file mode 100644
index 00000000000..a26edb19da0
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll
@@ -0,0 +1,420 @@
+; RUN: opt -S -codegenprepare -mtriple=thumbv7m -disable-complex-addr-modes=false -addr-sink-new-select=true -addr-sink-new-phis=true < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+@gv1 = common global i32 0, align 4
+@gv2 = common global i32 0, align 4
+
+; Phi selects between ptr and gep with ptr as base and constant offset
+define void @test_phi_onegep_offset(i32* %ptr, i32 %value) {
+; CHECK-LABEL: @test_phi_onegep_offset
+; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ]
+; CHECK: phi i32 [ 4, %if.then ], [ 0, %entry ]
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %gep = getelementptr inbounds i32, i32* %ptr, i32 1
+ br label %if.end
+
+if.end:
+ %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ]
+ store i32 %value, i32* %phi, align 4
+ ret void
+}
+
+; Phi selects between two geps with same base, different constant offsets
+define void @test_phi_twogep_offset(i32* %ptr, i32 %value) {
+; CHECK-LABEL: @test_phi_twogep_offset
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+; CHECK: phi i32 [ 8, %if.else ], [ 4, %if.then ]
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+ br label %if.end
+
+if.else:
+ %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+ br label %if.end
+
+if.end:
+ %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+ store i32 %value, i32* %phi, align 4
+ ret void
+}
+
+; Phi selects between ptr and gep with ptr as base and nonconstant offset
+define void @test_phi_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) {
+; CHECK-LABEL: @test_phi_onegep_nonconst_offset
+; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ]
+; CHECK: phi i32 [ %off, %if.then ], [ 0, %entry ]
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %gep = getelementptr inbounds i32, i32* %ptr, i32 %off
+ br label %if.end
+
+if.end:
+ %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ]
+ store i32 %value, i32* %phi, align 4
+ ret void
+}
+
+; Phi selects between two geps with same base, different nonconstant offsets
+define void @test_phi_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) {
+; CHECK-LABEL: @test_phi_twogep_nonconst_offset
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+; CHECK: phi i32 [ %off2, %if.else ], [ %off1, %if.then ]
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1
+ br label %if.end
+
+if.else:
+ %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2
+ br label %if.end
+
+if.end:
+ %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+ store i32 %value, i32* %phi, align 4
+ ret void
+}
+
+; Phi selects between two geps with different base, same constant offset
+define void @test_phi_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) {
+; CHECK-LABEL: @test_phi_twogep_base
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+; CHECK: phi i32* [ %ptr2, %if.else ], [ %ptr1, %if.then ]
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1
+ br label %if.end
+
+if.else:
+ %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1
+ br label %if.end
+
+if.end:
+ %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+ store i32 %value, i32* %phi, align 4
+ ret void
+}
+
+; Phi selects between two geps with different base global variables, same constant offset
+define void @test_phi_twogep_base_gv(i32 %value) {
+; CHECK-LABEL: @test_phi_twogep_base_gv
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+; CHECK: phi i32* [ @gv2, %if.else ], [ @gv1, %if.then ]
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1
+ br label %if.end
+
+if.else:
+ %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1
+ br label %if.end
+
+if.end:
+ %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+ store i32 %value, i32* %phi, align 4
+ ret void
+}
+
+; Phi selects between ptr and gep with ptr as base and constant offset
+define void @test_select_onegep_offset(i32* %ptr, i32 %value) {
+; CHECK-LABEL: @test_select_onegep_offset
+; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep
+; CHECK: select i1 %cmp, i32 0, i32 4
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ %gep = getelementptr inbounds i32, i32* %ptr, i32 1
+ %select = select i1 %cmp, i32* %ptr, i32* %gep
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; Select between two geps with same base, different constant offsets
+define void @test_select_twogep_offset(i32* %ptr, i32 %value) {
+; CHECK-LABEL: @test_select_twogep_offset
+; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2
+; CHECK: select i1 %cmp, i32 4, i32 8
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+ %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+ %select = select i1 %cmp, i32* %gep1, i32* %gep2
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; Select between ptr and gep with ptr as base and nonconstant offset
+define void @test_select_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) {
+; CHECK-LABEL: @test_select_onegep_nonconst_offset
+; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep
+; CHECK: select i1 %cmp, i32 0, i32 %off
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ %gep = getelementptr inbounds i32, i32* %ptr, i32 %off
+ %select = select i1 %cmp, i32* %ptr, i32* %gep
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; Select between two geps with same base, different nonconstant offsets
+define void @test_select_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) {
+; CHECK-LABEL: @test_select_twogep_nonconst_offset
+; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2
+; CHECK: select i1 %cmp, i32 %off1, i32 %off2
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1
+ %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2
+ %select = select i1 %cmp, i32* %gep1, i32* %gep2
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; Select between two geps with different base, same constant offset
+define void @test_select_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) {
+; CHECK-LABEL: @test_select_twogep_base
+; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2
+; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1
+ %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1
+ %select = select i1 %cmp, i32* %gep1, i32* %gep2
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; Select between two geps with different base global variables, same constant offset
+define void @test_select_twogep_base_gv(i32 %value) {
+; CHECK-LABEL: @test_select_twogep_base_gv
+; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2
+; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1
+ %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1
+ %select = select i1 %cmp, i32* %gep1, i32* %gep2
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; If the phi is in a different block to where the gep will be, the phi goes where
+; the original phi was not where the gep is.
+; CHECK-LABEL: @test_phi_different_block
+; CHECK-LABEL: if1.end
+; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if1.then ]
+; CHECK: phi i32 [ 4, %if1.then ], [ 0, %entry ]
+define void @test_phi_different_block(i32* %ptr, i32 %value1, i32 %value2) {
+entry:
+ %cmp1 = icmp sgt i32 %value1, 0
+ br i1 %cmp1, label %if1.then, label %if1.end
+
+if1.then:
+ %gep = getelementptr inbounds i32, i32* %ptr, i32 1
+ br label %if1.end
+
+if1.end:
+ %phi = phi i32* [ %ptr, %entry ], [ %gep, %if1.then ]
+ %cmp2 = icmp sgt i32 %value2, 0
+ br i1 %cmp2, label %if2.then, label %if2.end
+
+if2.then:
+ store i32 %value1, i32* %ptr, align 4
+ br label %if2.end
+
+if2.end:
+ store i32 %value2, i32* %phi, align 4
+ ret void
+}
+
+; A phi with three incoming values should be optimised
+; CHECK-LABEL: @test_phi_threegep
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ]
+; CHECK: phi i32 [ 12, %if.else.else ], [ 8, %if.else.then ], [ 4, %if.then ]
+define void @test_phi_threegep(i32* %ptr, i32 %value1, i32 %value2) {
+entry:
+ %cmp1 = icmp sgt i32 %value1, 0
+ br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+ %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+ br label %if.end
+
+if.else:
+ %cmp2 = icmp sgt i32 %value2, 0
+ br i1 %cmp2, label %if.else.then, label %if.else.else
+
+if.else.then:
+ %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+ br label %if.end
+
+if.else.else:
+ %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3
+ br label %if.end
+
+if.end:
+ %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ]
+ store i32 %value1, i32* %phi, align 4
+ ret void
+}
+
+; A phi with two incoming values but three geps due to nesting should be
+; optimised
+; CHECK-LABEL: @test_phi_threegep_nested
+; CHECK: %[[PHI:[a-z0-9_]+]] = phi i32 [ 12, %if.else.else ], [ 8, %if.else.then ]
+; CHECK: phi i32 [ %[[PHI]], %if.else.end ], [ 4, %if.then ]
+define void @test_phi_threegep_nested(i32* %ptr, i32 %value1, i32 %value2) {
+entry:
+ %cmp1 = icmp sgt i32 %value1, 0
+ br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+ %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+ br label %if.end
+
+if.else:
+ %cmp2 = icmp sgt i32 %value2, 0
+ br i1 %cmp2, label %if.else.then, label %if.else.else
+
+if.else.then:
+ %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+ br label %if.else.end
+
+if.else.else:
+ %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3
+ br label %if.else.end
+
+if.else.end:
+ %gep4 = phi i32* [ %gep2, %if.else.then ], [ %gep3, %if.else.else ]
+ store i32 %value2, i32* %ptr, align 4
+ br label %if.end
+
+if.end:
+ %phi = phi i32* [ %gep1, %if.then ], [ %gep4, %if.else.end ]
+ store i32 %value1, i32* %phi, align 4
+ ret void
+}
+
+; A nested select is expected to be optimised
+; CHECK-LABEL: @test_nested_select
+; CHECK: %[[SELECT:[a-z0-9_]+]] = select i1 %cmp2, i32 4, i32 8
+; CHECK: select i1 %cmp1, i32 4, i32 %[[SELECT]]
+define void @test_nested_select(i32* %ptr, i32 %value1, i32 %value2) {
+entry:
+ %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+ %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+ %cmp1 = icmp sgt i32 %value1, 0
+ %cmp2 = icmp sgt i32 %value2, 0
+ %select1 = select i1 %cmp2, i32* %gep1, i32* %gep2
+ %select2 = select i1 %cmp1, i32* %gep1, i32* %select1
+ store i32 %value1, i32* %select2, align 4
+ ret void
+}
+
+; Scaling the offset by a different amount is expected not to be optimised
+; CHECK-LABEL: @test_select_different_scale
+; CHECK: select i1 %cmp, i32* %gep1, i32* %castgep
+define void @test_select_different_scale(i32* %ptr, i32 %value, i32 %off) {
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ %castptr = bitcast i32* %ptr to i16*
+ %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off
+ %gep2 = getelementptr inbounds i16, i16* %castptr, i32 %off
+ %castgep = bitcast i16* %gep2 to i32*
+ %select = select i1 %cmp, i32* %gep1, i32* %castgep
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; A select between two values is already the best we can do
+; CHECK-LABEL: @test_select_trivial
+; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2
+define void @test_select_trivial(i32* %ptr1, i32* %ptr2, i32 %value) {
+entey:
+ %cmp = icmp sgt i32 %value, 0
+ %select = select i1 %cmp, i32* %ptr1, i32* %ptr2
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; A select between two global variables is already the best we can do
+; CHECK-LABEL: @test_select_trivial_gv
+; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2
+define void @test_select_trivial_gv(i32 %value) {
+entey:
+ %cmp = icmp sgt i32 %value, 0
+ %select = select i1 %cmp, i32* @gv1, i32* @gv2
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; Same for a select between a value and global variable
+; CHECK-LABEL: @test_select_trivial_ptr_gv
+; CHECK: select i1 %cmp, i32* %ptr, i32* @gv2
+define void @test_select_trivial_ptr_gv(i32* %ptr, i32 %value) {
+entry:
+ %cmp = icmp sgt i32 %value, 0
+ %select = select i1 %cmp, i32* %ptr, i32* @gv2
+ store i32 %value, i32* %select, align 4
+ ret void
+}
+
+; Same for a select between a global variable and null, though the test needs to
+; be a little more complicated to avoid dereferencing a potential null pointer
+; CHECK-LABEL: @test_select_trivial_gv_null
+; CHECK: select i1 %cmp.i, i32* @gv1, i32* null
+define void @test_select_trivial_gv_null(){
+entry:
+ %gv1_val = load i32, i32* @gv1, align 4
+ %cmp.i = icmp eq i32 %gv1_val, 0
+ %spec.select.i = select i1 %cmp.i, i32* @gv1, i32* null
+ br i1 %cmp.i, label %if.then, label %if.end
+
+if.then:
+ %val = load i32, i32* %spec.select.i, align 4
+ %inc = add nsw i32 %val, 1
+ store i32 %inc, i32* %spec.select.i, align 4
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+; Same for a select between a value and null
+; CHECK-LABEL: @test_select_trivial_ptr_null
+; CHECK: select i1 %cmp.i, i32* %ptr, i32* null
+define void @test_select_trivial_ptr_null(i32* %ptr){
+entry:
+ %gv1_val = load i32, i32* %ptr, align 4
+ %cmp.i = icmp eq i32 %gv1_val, 0
+ %spec.select.i = select i1 %cmp.i, i32* %ptr, i32* null
+ br i1 %cmp.i, label %if.then, label %if.end
+
+if.then:
+ %val = load i32, i32* %spec.select.i, align 4
+ %inc = add nsw i32 %val, 1
+ store i32 %inc, i32* %spec.select.i, align 4
+ br label %if.end
+
+if.end:
+ ret void
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll
new file mode 100644
index 00000000000..9dd0b373aa2
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=armv7-apple-darwin < %s -codegenprepare -S | FileCheck -check-prefix=NEON %s
+; RUN: opt -mtriple=armv6-unknown-linux < %s -codegenprepare -S | FileCheck -check-prefix=NONEON %s
+
+define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
+; NEON-LABEL: @sink_zext(
+; NEON-NEXT: entry:
+; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON: if.then:
+; NEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16>
+; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
+; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; NEON-NEXT: ret <8 x i16> [[RES_1]]
+; NEON: if.else:
+; NEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+; NEON-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
+; NEON-NEXT: ret <8 x i16> [[RES_2]]
+;
+; NONEON-LABEL: @sink_zext(
+; NONEON-NEXT: entry:
+; NONEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
+; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON: if.then:
+; NONEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16>
+; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]]
+; NONEON-NEXT: ret <8 x i16> [[RES_1]]
+; NONEON: if.else:
+; NONEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+; NONEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]]
+; NONEON-NEXT: ret <8 x i16> [[RES_2]]
+;
+entry:
+ %za = zext <8 x i8> %a to <8 x i16>
+ br i1 %c, label %if.then, label %if.else
+
+if.then:
+ %zb.1 = zext <8 x i8> %b to <8 x i16>
+ %res.1 = add <8 x i16> %za, %zb.1
+ ret <8 x i16> %res.1
+
+if.else:
+ %zb.2 = zext <8 x i8> %b to <8 x i16>
+ %res.2 = sub <8 x i16> %za, %zb.2
+ ret <8 x i16> %res.2
+}
+
+define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
+; NEON-LABEL: @sink_sext(
+; NEON-NEXT: entry:
+; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON: if.then:
+; NEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
+; NEON-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; NEON-NEXT: ret <8 x i16> [[RES_1]]
+; NEON: if.else:
+; NEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
+; NEON-NEXT: ret <8 x i16> [[RES_2]]
+;
+; NONEON-LABEL: @sink_sext(
+; NONEON-NEXT: entry:
+; NONEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON: if.then:
+; NONEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
+; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]]
+; NONEON-NEXT: ret <8 x i16> [[RES_1]]
+; NONEON: if.else:
+; NONEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+; NONEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]]
+; NONEON-NEXT: ret <8 x i16> [[RES_2]]
+;
+entry:
+ %za = sext <8 x i8> %a to <8 x i16>
+ br i1 %c, label %if.then, label %if.else
+
+if.then:
+ %zb.1 = sext <8 x i8> %b to <8 x i16>
+ %res.1 = add <8 x i16> %za, %zb.1
+ ret <8 x i16> %res.1
+
+if.else:
+ %zb.2 = sext <8 x i8> %b to <8 x i16>
+ %res.2 = sub <8 x i16> %za, %zb.2
+ ret <8 x i16> %res.2
+}
+
+define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i16> %b, i1 %c) {
+;
+; NEON-LABEL: @do_not_sink_nonfree_zext(
+; NEON-NEXT: entry:
+; NEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
+; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON: if.then:
+; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]]
+; NEON-NEXT: ret <8 x i16> [[RES_1]]
+; NEON: if.else:
+; NEON-NEXT: ret <8 x i16> [[B]]
+;
+; NONEON-LABEL: @do_not_sink_nonfree_zext(
+; NONEON-NEXT: entry:
+; NONEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
+; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON: if.then:
+; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]]
+; NONEON-NEXT: ret <8 x i16> [[RES_1]]
+; NONEON: if.else:
+; NONEON-NEXT: ret <8 x i16> [[B]]
+;
+entry:
+ %za = zext <8 x i8> %a to <8 x i16>
+ br i1 %c, label %if.then, label %if.else
+
+if.then:
+ %res.1 = add <8 x i16> %za, %b
+ ret <8 x i16> %res.1
+
+if.else:
+ ret <8 x i16> %b
+}
+
+define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i16> %b, i1 %c) {
+; CHECK-LABEL: @do_not_sink_nonfree_sext(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
+; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; CHECK-NEXT: ret <8 x i16> [[RES_1]]
+; CHECK: if.else:
+; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> [[ZB_2]]
+;
+; NEON-LABEL: @do_not_sink_nonfree_sext(
+; NEON-NEXT: entry:
+; NEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON: if.then:
+; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]]
+; NEON-NEXT: ret <8 x i16> [[RES_1]]
+; NEON: if.else:
+; NEON-NEXT: ret <8 x i16> [[B]]
+;
+; NONEON-LABEL: @do_not_sink_nonfree_sext(
+; NONEON-NEXT: entry:
+; NONEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON: if.then:
+; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]]
+; NONEON-NEXT: ret <8 x i16> [[RES_1]]
+; NONEON: if.else:
+; NONEON-NEXT: ret <8 x i16> [[B]]
+;
+entry:
+ %za = sext <8 x i8> %a to <8 x i16>
+ br i1 %c, label %if.then, label %if.else
+
+if.then:
+ %res.1 = add <8 x i16> %za, %b
+ ret <8 x i16> %res.1
+
+if.else:
+ ret <8 x i16> %b
+}
+
+declare void @user1(<8 x i16>)
+
+; Exts can be sunk.
+define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b) {
+; NEON-LABEL: @sink_shufflevector_ext_subadd_multiuse(
+; NEON-NEXT: entry:
+; NEON-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16>
+; NEON-NEXT: call void @user1(<8 x i16> [[Z3]])
+; NEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON: if.then:
+; NEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16>
+; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[S1]] to <8 x i16>
+; NEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP0]], [[Z2]]
+; NEON-NEXT: ret <8 x i16> [[RES1]]
+; NEON: if.else:
+; NEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16>
+; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[S3]] to <8 x i16>
+; NEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP1]], [[Z4]]
+; NEON-NEXT: ret <8 x i16> [[RES2]]
+;
+; NONEON-LABEL: @sink_shufflevector_ext_subadd_multiuse(
+; NONEON-NEXT: entry:
+; NONEON-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NONEON-NEXT: [[Z1:%.*]] = zext <8 x i8> [[S1]] to <8 x i16>
+; NONEON-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NONEON-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16>
+; NONEON-NEXT: call void @user1(<8 x i16> [[Z3]])
+; NONEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON: if.then:
+; NONEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NONEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16>
+; NONEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[Z1]], [[Z2]]
+; NONEON-NEXT: ret <8 x i16> [[RES1]]
+; NONEON: if.else:
+; NONEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NONEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16>
+; NONEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[Z3]], [[Z4]]
+; NONEON-NEXT: ret <8 x i16> [[RES2]]
+;
+entry:
+ %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %z1 = zext <8 x i8> %s1 to <8 x i16>
+ %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %z3 = sext <8 x i8> %s3 to <8 x i16>
+ call void @user1(<8 x i16> %z3)
+ br i1 undef, label %if.then, label %if.else
+
+if.then:
+ %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %z2 = zext <8 x i8> %s2 to <8 x i16>
+ %res1 = add <8 x i16> %z1, %z2
+ ret <8 x i16> %res1
+
+if.else:
+ %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %z4 = sext <8 x i8> %s4 to <8 x i16>
+ %res2 = sub <8 x i16> %z3, %z4
+ ret <8 x i16> %res2
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll
new file mode 100644
index 00000000000..b2edb852385
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -codegenprepare %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; Check that we have deterministic output
+define void @test([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %0 = bitcast [65536 x i32]* %t to i8*
+; CHECK-NEXT: %splitgep1 = getelementptr i8, i8* %0, i32 80000
+; CHECK-NEXT: %s = load [65536 x i32]*, [65536 x i32]** %sp
+; CHECK-NEXT: %1 = bitcast [65536 x i32]* %s to i8*
+; CHECK-NEXT: %splitgep = getelementptr i8, i8* %1, i32 80000
+entry:
+ %s = load [65536 x i32]*, [65536 x i32]** %sp
+ br label %while_cond
+
+while_cond:
+ %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+ %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000
+ %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001
+ %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000
+ %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001
+ %cmp = icmp slt i32 %phi, %n
+ br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+ %i = add i32 %phi, 1
+ %j = add i32 %phi, 2
+ store i32 %i, i32* %gep0
+ store i32 %phi, i32* %gep1
+ store i32 %i, i32* %gep2
+ store i32 %phi, i32* %gep3
+ br label %while_cond
+
+while_end:
+ ret void
+}
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll
new file mode 100644
index 00000000000..09658ae75ac
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll
@@ -0,0 +1,77 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target triple = "armv8m.main-none-eabi"
+
+declare i8* @f0()
+declare i8* @f1()
+
+define i8* @tail_dup() {
+; CHECK-LABEL: tail_dup
+; CHECK: tail call i8* @f0()
+; CHECK-NEXT: ret i8*
+; CHECK: tail call i8* @f1()
+; CHECK-NEXT: ret i8*
+bb0:
+ %tmp0 = tail call i8* @f0()
+ br label %return
+bb1:
+ %tmp1 = tail call i8* @f1()
+ br label %return
+return:
+ %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
+ ret i8* %retval
+}
+
+define nonnull i8* @nonnull_dup() {
+; CHECK-LABEL: nonnull_dup
+; CHECK: tail call i8* @f0()
+; CHECK-NEXT: ret i8*
+; CHECK: tail call i8* @f1()
+; CHECK-NEXT: ret i8*
+bb0:
+ %tmp0 = tail call i8* @f0()
+ br label %return
+bb1:
+ %tmp1 = tail call i8* @f1()
+ br label %return
+return:
+ %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
+ ret i8* %retval
+}
+
+define i8* @noalias_dup() {
+; CHECK-LABEL: noalias_dup
+; CHECK: tail call noalias i8* @f0()
+; CHECK-NEXT: ret i8*
+; CHECK: tail call noalias i8* @f1()
+; CHECK-NEXT: ret i8*
+bb0:
+ %tmp0 = tail call noalias i8* @f0()
+ br label %return
+bb1:
+ %tmp1 = tail call noalias i8* @f1()
+ br label %return
+return:
+ %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
+ ret i8* %retval
+}
+
+; Use inreg as a way of testing that attributes (other than nonnull and
+; noalias) disable the tailcall duplication in cgp.
+
+define inreg i8* @inreg_nodup() {
+; CHECK-LABEL: inreg_nodup
+; CHECK: tail call i8* @f0()
+; CHECK-NEXT: br label %return
+; CHECK: tail call i8* @f1()
+; CHECK-NEXT: br label %return
+bb0:
+ %tmp0 = tail call i8* @f0()
+ br label %return
+bb1:
+ %tmp1 = tail call i8* @f1()
+ br label %return
+return:
+ %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
+ ret i8* %retval
+}
OpenPOWER on IntegriCloud