summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorJoel Galenson <jgalenson@google.com>2018-01-17 19:19:05 +0000
committerJoel Galenson <jgalenson@google.com>2018-01-17 19:19:05 +0000
commitfe7fa40869b5b85cf7a99e7ecd13468ddfd955b1 (patch)
treef7d9eb23b8ec28150dc98079420c55c31878385a /llvm/test/CodeGen
parent727f153b6fe28918a28c5b984d79274a10e8f0f1 (diff)
downloadbcm5719-llvm-fe7fa40869b5b85cf7a99e7ecd13468ddfd955b1.tar.gz
bcm5719-llvm-fe7fa40869b5b85cf7a99e7ecd13468ddfd955b1.zip
[ARM] Optimize {s,u}{add,sub}.with.overflow.
The ARM backend contains code that tries to optimize compares by replacing them with an existing instruction that sets the flags the same way. This allows it to replace a "cmp" with a "adds", generalizing the code that replaces "cmp" with "sub". It also heuristically disables sinking of instructions that could potentially be used to replace compares (currently only if they're next to each other). Differential revision: https://reviews.llvm.org/D38378 llvm-svn: 322737
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/ARM/intrinsics-overflow.ll15
-rw-r--r--llvm/test/CodeGen/ARM/su-addsub-overflow.ll63
2 files changed, 54 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
index af555d2240c..5f78b13c18d 100644
--- a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
+++ b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
@@ -33,10 +33,10 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
; CHECK-LABEL: sadd_overflow:
- ; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
- ; ARM: mov r[[R1]], #1
- ; ARM: cmp r[[R2]], r[[R0]]
- ; ARM: movvc r[[R1]], #0
+ ; ARM: adds r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
+ ; ARM: mov r[[R0]], #1
+ ; ARM: movvc r[[R0]], #0
+ ; ARM: mov pc, lr
; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]]
; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]]
@@ -47,11 +47,10 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
; THUMBV6: mov r[[R0]], r[[R1]]
; THUMBV6: .L[[LABEL]]:
- ; THUMBV7: movs r[[R1]], #1
- ; THUMBV7: cmp r[[R2]], r[[R0]]
+ ; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
+ ; THUMBV7: mov.w r[[R0:[0-9]+]], #1
; THUMBV7: it vc
- ; THUMBV7: movvc r[[R1]], #0
- ; THUMBV7: mov r[[R0]], r[[R1]]
+ ; THUMBV7: movvc r[[R0]], #0
}
define i32 @usub_overflow(i32 %a, i32 %b) #0 {
diff --git a/llvm/test/CodeGen/ARM/su-addsub-overflow.ll b/llvm/test/CodeGen/ARM/su-addsub-overflow.ll
index eef53128203..04e59e05b6d 100644
--- a/llvm/test/CodeGen/ARM/su-addsub-overflow.ll
+++ b/llvm/test/CodeGen/ARM/su-addsub-overflow.ll
@@ -2,9 +2,7 @@
define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: sadd:
-; CHECK: mov r[[R0:[0-9]+]], r0
-; CHECK-NEXT: add r[[R1:[0-9]+]], r[[R0]], r1
-; CHECK-NEXT: cmp r[[R1]], r[[R0]]
+; CHECK: adds r0, r0, r1
; CHECK-NEXT: movvc pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
@@ -23,10 +21,8 @@ cont:
define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: uadd:
-; CHECK: mov r[[R0:[0-9]+]], r0
-; CHECK-NEXT: adds r[[R1:[0-9]+]], r[[R0]], r1
-; CHECK-NEXT: cmp r[[R1]], r[[R0]]
-; CHECK-NEXT: movhs pc, lr
+; CHECK: adds r0, r0, r1
+; CHECK-NEXT: movlo pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
%1 = extractvalue { i32, i1 } %0, 1
@@ -44,8 +40,7 @@ cont:
define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: ssub:
-; CHECK: cmp r0, r1
-; CHECK-NEXT: subvc r0, r0, r1
+; CHECK: subs r0, r0, r1
; CHECK-NEXT: movvc pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
@@ -64,9 +59,7 @@ cont:
define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: usub:
-; CHECK: mov r[[R0:[0-9]+]], r0
-; CHECK-NEXT: subs r[[R1:[0-9]+]], r[[R0]], r1
-; CHECK-NEXT: cmp r[[R0]], r1
+; CHECK: subs r0, r0, r1
; CHECK-NEXT: movhs pc, lr
entry:
%0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
@@ -87,11 +80,9 @@ define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 {
; CHECK-LABEL: sum:
; CHECK: ldr [[R0:r[0-9]+]],
; CHECK-NEXT: ldr [[R1:r[0-9]+|lr]],
-; CHECK-NEXT: add [[R2:r[0-9]+]], [[R1]], [[R0]]
-; CHECK-NEXT: cmp [[R2]], [[R1]]
+; CHECK-NEXT: adds [[R2:r[0-9]+]], [[R1]], [[R0]]
; CHECK-NEXT: strvc [[R2]],
-; CHECK-NEXT: addvc
-; CHECK-NEXT: cmpvc
+; CHECK-NEXT: addsvc
; CHECK-NEXT: bvs
entry:
%cmp7 = icmp eq i32 %n, 0
@@ -128,6 +119,46 @@ cont2:
}
+define void @extern_loop(i32 %n) local_unnamed_addr #0 {
+; Do not replace the compare around the clobbering call.
+; CHECK: add {{r[0-9]+}}, {{r[0-9]+}}, #1
+; CHECK-NEXT: bl external_fn
+; CHECK: cmp
+entry:
+ %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %n, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+ br i1 %1, label %trap, label %cont.lr.ph
+
+cont.lr.ph:
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp5 = icmp sgt i32 %2, 0
+ br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+trap:
+ tail call void @llvm.trap() #2
+ unreachable
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %i.046 = phi i32 [ %5, %cont1 ], [ 0, %for.body.preheader ]
+ tail call void bitcast (void (...)* @external_fn to void ()*)() #4
+ %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.046, i32 1)
+ %4 = extractvalue { i32, i1 } %3, 1
+ br i1 %4, label %trap, label %cont1
+
+cont1:
+ %5 = extractvalue { i32, i1 } %3, 0
+ %cmp = icmp slt i32 %5, %2
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+declare void @external_fn(...) local_unnamed_addr #0
+
declare void @llvm.trap() #2
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
OpenPOWER on IntegriCloud