summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorDale Johannesen <dalej@apple.com>2010-06-18 19:00:18 +0000
committerDale Johannesen <dalej@apple.com>2010-06-18 19:00:18 +0000
commitc1570dda5c4fd8792fb159a0ec77ceee6348887a (patch)
tree896b7cbefa8b205ce5fcf2aa4f0db2ff04f1a105 /llvm/test/CodeGen
parente5457c275d0b5594995af70f8bf900c7fe435c26 (diff)
downloadbcm5719-llvm-c1570dda5c4fd8792fb159a0ec77ceee6348887a.tar.gz
bcm5719-llvm-c1570dda5c4fd8792fb159a0ec77ceee6348887a.zip
Enable tail calls on ARM by default, with some
basic tests. This has been well tested on Darwin but not elsewhere. It should work provided the linker correctly resolves B.W <label in other function> which it has not seen before, at least from llvm-based compilers. I'm leaving the arm-tail-calls switch in until I see if there's any problems because of that; it might need to be disabled for some environments. llvm-svn: 106299
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/ARM/call-tc.ll36
-rw-r--r--llvm/test/CodeGen/ARM/ifcvt6-tc.ll23
-rw-r--r--llvm/test/CodeGen/ARM/insn-sched1-tc.ll11
-rw-r--r--llvm/test/CodeGen/ARM/ldm-tc.ll37
-rw-r--r--llvm/test/CodeGen/Thumb2/thumb2-call-tc.ll27
-rw-r--r--llvm/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll86
6 files changed, 220 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/call-tc.ll b/llvm/test/CodeGen/ARM/call-tc.ll
new file mode 100644
index 00000000000..8103fab2092
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/call-tc.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECKV4
+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
+; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
+
+@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; CHECKELF: PLT
+ call void @g( i32 1, i32 2, i32 3, i32 4 )
+ ret void
+}
+
+define void @g.upgrd.1() {
+; CHECKV4: bx r0 @ TAILCALL
+; CHECKV5: bx r0 @ TAILCALL
+ %tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
+ %tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
+ ret void
+}
+
+define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
+; CHECKV4: m_231b
+; CHECKV4: bx r{{.*}}
+BB0:
+ %5 = inttoptr i32 %0 to i32* ; <i32*> [#uses=1]
+ %t35 = volatile load i32* %5 ; <i32> [#uses=1]
+ %6 = inttoptr i32 %t35 to i32** ; <i32**> [#uses=1]
+ %7 = getelementptr i32** %6, i32 86 ; <i32**> [#uses=1]
+ %8 = load i32** %7 ; <i32*> [#uses=1]
+ %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
+ %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
+ ret i32* %10
+}
diff --git a/llvm/test/CodeGen/ARM/ifcvt6-tc.ll b/llvm/test/CodeGen/ARM/ifcvt6-tc.ll
new file mode 100644
index 00000000000..5b28804f380
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ifcvt6-tc.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep cmpne | count 1
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
+; RUN: grep bhi | count 1
+; Here, tail call wins over eliminating branches. It is 1 fewer instruction
+; and removes all stack accesses, so seems like a win.
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+ %tmp1 = icmp ult i32 %X, 4 ; <i1> [#uses=1]
+ %tmp4 = icmp eq i32 %Y, 0 ; <i1> [#uses=1]
+ %tmp7 = or i1 %tmp4, %tmp1 ; <i1> [#uses=1]
+ br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true: ; preds = %entry
+ %tmp10 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/CodeGen/ARM/insn-sched1-tc.ll b/llvm/test/CodeGen/ARM/insn-sched1-tc.ll
new file mode 100644
index 00000000000..c457c8c5a55
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/insn-sched1-tc.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: grep mov | count 2
+
+define i32 @test(i32 %x) {
+ %tmp = trunc i32 %x to i16 ; <i16> [#uses=1]
+ %tmp2 = tail call i32 @f( i32 1, i16 %tmp ) ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+declare i32 @f(i32, i16)
diff --git a/llvm/test/CodeGen/ARM/ldm-tc.ll b/llvm/test/CodeGen/ARM/ldm-tc.ll
new file mode 100644
index 00000000000..3819192429e
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ldm-tc.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+
+@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: ldmia
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
+ ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: ldmia
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
+ %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: ldmib
+; CHECK: b.w _f2 @ TAILCALL
+ %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-call-tc.ll b/llvm/test/CodeGen/Thumb2/thumb2-call-tc.ll
new file mode 100644
index 00000000000..d31ae0cc455
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/thumb2-call-tc.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+
+@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; DARWIN: f:
+; DARWIN: blx _g
+
+; LINUX: f:
+; LINUX: bl g
+ call void @g( i32 1, i32 2, i32 3, i32 4 )
+ ret void
+}
+
+define void @h() {
+; DARWIN: h:
+; DARWIN: bx r0 @ TAILCALL
+
+; LINUX: h:
+; LINUX: bx r0 @ TAILCALL
+ %tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
+ %tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
+ ret void
+}
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
new file mode 100644
index 00000000000..c0244154771
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: cmpne
+ switch i32 %c, label %cond_next [
+ i32 1, label %cond_true
+ i32 7, label %cond_true
+ ]
+
+cond_true:
+ %tmp12 = add i32 %a, 1
+ %tmp1518 = add i32 %tmp12, %b
+ ret i32 %tmp1518
+
+cond_next:
+ %tmp15 = add i32 %b, %a
+ ret i32 %tmp15
+}
+
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+define i32 @t2(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: ite gt
+; CHECK: subgt
+; CHECK: suble
+ %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
+ br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer: ; preds = %cond_false, %entry
+ %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]
+ %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
+ br label %bb
+
+bb: ; preds = %cond_true, %bb.outer
+ %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
+ %tmp. = sub i32 0, %b_addr.021.0.ph ; <i32> [#uses=1]
+ %tmp.40 = mul i32 %indvar, %tmp. ; <i32> [#uses=1]
+ %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; <i32> [#uses=6]
+ %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; <i1> [#uses=1]
+ br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true: ; preds = %bb
+ %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; <i32> [#uses=2]
+ %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; <i1> [#uses=1]
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
+ br i1 %tmp1437, label %bb17, label %bb
+
+cond_false: ; preds = %bb
+ %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; <i32> [#uses=2]
+ %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; <i1> [#uses=1]
+ br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17: ; preds = %cond_false, %cond_true, %entry
+ %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
+ ret i32 %a_addr.026.1
+}
+
+@x = external global i32* ; <i32**> [#uses=1]
+
+define void @foo(i32 %a) nounwind {
+entry:
+ %tmp = load i32** @x ; <i32*> [#uses=1]
+ store i32 %a, i32* %tmp
+ ret void
+}
+
+; Tail call prevents use of ifcvt in this one. Seems like a win though.
+define void @t3(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t3:
+; CHECK-NOT: it lt
+; CHECK-NOT: poplt
+; CHECK: b.w _foo @ TAILCALL
+ %tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1]
+ br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true: ; preds = %entry
+ tail call void @foo( i32 %b )
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
OpenPOWER on IntegriCloud