diff options
author | Joel Galenson <jgalenson@google.com> | 2018-01-17 19:19:05 +0000 |
---|---|---|
committer | Joel Galenson <jgalenson@google.com> | 2018-01-17 19:19:05 +0000 |
commit | fe7fa40869b5b85cf7a99e7ecd13468ddfd955b1 (patch) | |
tree | f7d9eb23b8ec28150dc98079420c55c31878385a /llvm/test | |
parent | 727f153b6fe28918a28c5b984d79274a10e8f0f1 (diff) | |
download | bcm5719-llvm-fe7fa40869b5b85cf7a99e7ecd13468ddfd955b1.tar.gz bcm5719-llvm-fe7fa40869b5b85cf7a99e7ecd13468ddfd955b1.zip |
[ARM] Optimize {s,u}{add,sub}.with.overflow.
The ARM backend contains code that tries to optimize compares by replacing them with an existing instruction that sets the flags the same way. This allows it to replace a "cmp" with a "adds", generalizing the code that replaces "cmp" with "sub". It also heuristically disables sinking of instructions that could potentially be used to replace compares (currently only if they're next to each other).
Differential revision: https://reviews.llvm.org/D38378
llvm-svn: 322737
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/ARM/intrinsics-overflow.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/su-addsub-overflow.ll | 63 |
2 files changed, 54 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll index af555d2240c..5f78b13c18d 100644 --- a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll +++ b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll @@ -33,10 +33,10 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 { ; CHECK-LABEL: sadd_overflow: - ; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; ARM: mov r[[R1]], #1 - ; ARM: cmp r[[R2]], r[[R0]] - ; ARM: movvc r[[R1]], #0 + ; ARM: adds r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] + ; ARM: mov r[[R0]], #1 + ; ARM: movvc r[[R0]], #0 + ; ARM: mov pc, lr ; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]] ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]] @@ -47,11 +47,10 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 { ; THUMBV6: mov r[[R0]], r[[R1]] ; THUMBV6: .L[[LABEL]]: - ; THUMBV7: movs r[[R1]], #1 - ; THUMBV7: cmp r[[R2]], r[[R0]] + ; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV7: mov.w r[[R0:[0-9]+]], #1 ; THUMBV7: it vc - ; THUMBV7: movvc r[[R1]], #0 - ; THUMBV7: mov r[[R0]], r[[R1]] + ; THUMBV7: movvc r[[R0]], #0 } define i32 @usub_overflow(i32 %a, i32 %b) #0 { diff --git a/llvm/test/CodeGen/ARM/su-addsub-overflow.ll b/llvm/test/CodeGen/ARM/su-addsub-overflow.ll index eef53128203..04e59e05b6d 100644 --- a/llvm/test/CodeGen/ARM/su-addsub-overflow.ll +++ b/llvm/test/CodeGen/ARM/su-addsub-overflow.ll @@ -2,9 +2,7 @@ define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 { ; CHECK-LABEL: sadd: -; CHECK: mov r[[R0:[0-9]+]], r0 -; CHECK-NEXT: add r[[R1:[0-9]+]], r[[R0]], r1 -; CHECK-NEXT: cmp r[[R1]], r[[R0]] +; CHECK: adds r0, r0, r1 ; CHECK-NEXT: movvc pc, lr entry: %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) @@ -23,10 +21,8 @@ cont: define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 { ; CHECK-LABEL: uadd: -; CHECK: mov r[[R0:[0-9]+]], r0 -; CHECK-NEXT: adds r[[R1:[0-9]+]], r[[R0]], r1 -; CHECK-NEXT: cmp r[[R1]], r[[R0]] -; CHECK-NEXT: movhs pc, lr +; CHECK: adds r0, r0, r1 +; CHECK-NEXT: movlo pc, lr entry: %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %1 = extractvalue { i32, i1 } %0, 1 @@ -44,8 +40,7 @@ cont: define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 { ; CHECK-LABEL: ssub: -; CHECK: cmp r0, r1 -; CHECK-NEXT: subvc r0, r0, r1 +; CHECK: subs r0, r0, r1 ; CHECK-NEXT: movvc pc, lr entry: %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) @@ -64,9 +59,7 @@ cont: define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 { ; CHECK-LABEL: usub: -; CHECK: mov r[[R0:[0-9]+]], r0 -; CHECK-NEXT: subs r[[R1:[0-9]+]], r[[R0]], r1 -; CHECK-NEXT: cmp r[[R0]], r1 +; CHECK: subs r0, r0, r1 ; CHECK-NEXT: movhs pc, lr entry: %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) @@ -87,11 +80,9 @@ define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 { ; CHECK-LABEL: sum: ; CHECK: ldr [[R0:r[0-9]+]], ; CHECK-NEXT: ldr [[R1:r[0-9]+|lr]], -; CHECK-NEXT: add [[R2:r[0-9]+]], [[R1]], [[R0]] -; CHECK-NEXT: cmp [[R2]], [[R1]] +; CHECK-NEXT: adds [[R2:r[0-9]+]], [[R1]], [[R0]] ; CHECK-NEXT: strvc [[R2]], -; CHECK-NEXT: addvc -; CHECK-NEXT: cmpvc +; CHECK-NEXT: addsvc ; CHECK-NEXT: bvs entry: %cmp7 = icmp eq i32 %n, 0 @@ -128,6 +119,46 @@ cont2: } +define void @extern_loop(i32 %n) local_unnamed_addr #0 { +; Do not replace the compare around the clobbering call. +; CHECK: add {{r[0-9]+}}, {{r[0-9]+}}, #1 +; CHECK-NEXT: bl external_fn +; CHECK: cmp +entry: + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %n, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont.lr.ph + +cont.lr.ph: + %2 = extractvalue { i32, i1 } %0, 0 + %cmp5 = icmp sgt i32 %2, 0 + br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + br label %for.body + +trap: + tail call void @llvm.trap() #2 + unreachable + +for.cond.cleanup: + ret void + +for.body: + %i.046 = phi i32 [ %5, %cont1 ], [ 0, %for.body.preheader ] + tail call void bitcast (void (...)* @external_fn to void ()*)() #4 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.046, i32 1) + %4 = extractvalue { i32, i1 } %3, 1 + br i1 %4, label %trap, label %cont1 + +cont1: + %5 = extractvalue { i32, i1 } %3, 0 + %cmp = icmp slt i32 %5, %2 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +declare void @external_fn(...) local_unnamed_addr #0 + declare void @llvm.trap() #2 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 |